Copepod Collection

Copepods were collected at approximately weekly intervals from Lake Champlain (Burlington Fishing Pier). Plankton was collected from the top 3 meters using a 250 um mesh net.

# # Lake Champlain near Burlington, VT
# siteNumber = "04294500"
# ChamplainInfo = readNWISsite(siteNumber)
# parameterCd = "00010"
# startDate = "2023-01-01"
# endDate = "2024-5-20"
# #statCd = c("00001", "00002","00003", "00011") # 1 - max, 2 - min, 3 = mean
# 
# # Constructs the URL for the data wanted then downloads the data
# url = constructNWISURL(siteNumbers = siteNumber, parameterCd = parameterCd,
#                        startDate = startDate, endDate = endDate, service = "uv")
# 
# raw_temps = importWaterML1(url, asDateTime = T) %>%
#   mutate("date" = as.Date(dateTime),
#          "hour" = hour(dateTime)) %>%
#   select(dateTime, tz_cd, date, hour, degC = X_00010_00000)
# 
# temp_data =  raw_temps %>%
#   select(date, hour, "temp" = degC)
# 
# write.csv(temp_data, file = "./Output/Data/champlain_temps.csv", row.names = F)

Collections began in late May 2023. Several gaps are present, but collections have continued at roughly weekly intervals since then. Copepods from 48 collections were used to make a total of 1312 thermal limit measurements. Over this time period, collection temperatures ranged from 2.5 to 26.5°C.

There is substantial variation in thermal limits across the species collected. There is also some degree of variation within the species, with thermal limits increasing slightly during the summer.

## Daily values for the period examined by dataset
collection_conditions = temp_data %>%
  ungroup() %>% 
  group_by(date) %>% 
  summarise(mean_temp = mean(temp),
            med_temp = median(temp),
            var_temp = var(temp), 
            min_temp = min(temp), 
            max_temp = max(temp)) %>% 
  mutate("range_temp" = max_temp - min_temp,
         date = as.Date(date)) %>% 
  ungroup() %>%  
  filter(date >= (min(as.Date(full_data$collection_date)) - 7)) %>% 
  left_join(unique(select(full_data, collection_date, collection_temp)), by = join_by(date == collection_date))

## Mean female thermal limits for each species, grouped by collection
species_summaries = full_data %>%  
  #filter(sex == "female") %>% 
  group_by(sp_name, collection_date, collection_temp) %>%  
  summarise("mean_ctmax" = mean(ctmax),
            "sample_size" = n(),
            "ctmax_st_err" = (sd(ctmax) / sqrt(sample_size)),
            "ctmax_var" = var(ctmax), 
            "mean_size" = mean(size),
            "size_st_err" = (sd(size) / sqrt(sample_size)),
            "size_var" = var(size)) %>%  
  ungroup() %>% 
  complete(sp_name, collection_date) %>% 
  arrange(desc(sample_size))

adult_summaries = full_data %>%  
  filter(sex == "female") %>% 
  group_by(sp_name, collection_date, collection_temp) %>%  
  summarise("mean_ctmax" = mean(ctmax),
            "sample_size" = n(),
            "ctmax_st_err" = (sd(ctmax) / sqrt(sample_size)),
            "ctmax_var" = var(ctmax), 
            "mean_size" = mean(size),
            "size_st_err" = (sd(size) / sqrt(sample_size)),
            "size_var" = var(size)) %>%  
  ungroup() %>% 
  complete(sp_name, collection_date) %>% 
  arrange(desc(sample_size))


tseries_data = full_data %>% 
  mutate(sp_name = fct_reorder(sp_name, ctmax, .desc = T))

ggplot() + 
  geom_line(data = collection_conditions, 
            aes(x = as.Date(date), y = mean_temp),
            colour = "black", 
            linewidth = 1) + 
  geom_point(data = filter(tseries_data, species != "osphranticum_labronectum"), 
             aes(x = as.Date(collection_date), y = ctmax, colour = sp_name),
             size = 1.5, shape = 16, alpha = 0.9,
             position = position_jitter(width = 0, height = 0)) + 
    geom_point(data = filter(tseries_data, species == "osphranticum_labronectum"), 
             aes(x = as.Date(collection_date), y = ctmax, colour = sp_name),
             size = 2, shape = 16, alpha = 0.9,
             position = position_jitter(width = 0, height = 0)) + 
  scale_colour_manual(values = species_cols) + 
  guides(colour = guide_legend(override.aes = list(alpha = 1, size = 5))) + 
  labs(x = "Date", 
       y = "Temperature (°C)", 
       colour = "Species",
       size = "Sample Size") + 
  theme_matt() + 
  theme(legend.position = "right", 
        axis.text.x = element_text(angle = 320, hjust = 0, vjust = 0.5))


lake_temps = ggplot() + 
  geom_line(data = collection_conditions, 
            aes(x = as.Date(date), y = mean_temp),
            colour = "black", 
            linewidth = 1) + 
  labs(x = "Date", 
       y = "Temperature (°C)", 
       colour = "Species",
       size = "Sample Size") + 
  theme_matt() + 
  theme(legend.position = "right")

Temperatures observed at the time of collection closely resembled the maximum daily temperature from the temperature sensor data. Maximum temperature was used as a proxy instead of mean temperature as collections were usually made during afternoons or early evenings, just following the warmest part of the day.

collection_conditions %>% 
  drop_na(collection_temp) %>%  
  ggplot(aes(x = max_temp, y = collection_temp)) + 
  geom_abline(intercept = 0, slope = 1,
              linewidth = 1, colour = "grey") + 
  geom_point(size = 3) +
  scale_x_continuous(breaks = c(5,15,25)) + 
  scale_y_continuous(breaks = c(5,15,25)) + 
  labs(x = "Max. Temp. from Sensor (°C)",
       y = "Collection Temp. (°C)") + 
  theme_matt()

Size also varied, but primarily between rather than within species.

ggplot() + 
  geom_vline(data = unique(select(full_data, collection_date)), 
             aes(xintercept = as.Date(collection_date)),
             colour = "grey90",
             linewidth = 1) + 
  geom_line(data = collection_conditions, 
            aes(x = as.Date(date), y = mean_temp),
            colour = "black", 
            linewidth = 2) + 
  # geom_errorbar(data = species_summaries,
  #               aes(x = as.Date(collection_date), 
  #                   ymin = mean_ctmax - ctmax_st_err, ymax = mean_ctmax + ctmax_st_err,
  #                   colour = sp_name),
  #               position = position_dodge(width = 1),
  #               width = 5, linewidth = 1) + 
  geom_point(data = adult_summaries, 
             aes(x = as.Date(collection_date), y = mean_size * 40, colour = sp_name, size = sample_size),
             position = position_dodge(width = 1)) + 
  scale_colour_manual(values = species_cols) + 
  scale_y_continuous(
    name = "Temperature", # Features of the first axis
    sec.axis = sec_axis(~./40, name="Prosome Length (mm)"), # Add a second axis and specify its features
    breaks = c(0,5,10,15,20,25,30)
  ) + 
  labs(x = "Date", 
       y = "Temperature (°C)", 
       colour = "Species") + 
  theme_matt() + 
  theme(legend.position = "right")

sample_dates_plot = full_data %>%  
  filter(sp_name != "Osphranticum labronectum") %>% 
  mutate(sp_name = as.factor(sp_name),
         sp_name = fct_reorder(sp_name, ctmax)) %>% 
  ggplot(aes(x = lubridate::as_date(collection_date), 
             y = sp_name, fill = sp_name)) + 
  # geom_vline(xintercept = as_date(
  #   c("2023-05-01",
  #     "2023-09-01",
  #     "2024-01-01",
  #     "2024-05-01")),
  #   colour = "grey",
  #   linewidth = 1) + 
  geom_density_ridges(bandwidth = 30,
                      jittered_points = TRUE, 
                      point_shape = 21,
                      point_size = 1,
                      point_colour = "grey30",
                      point_alpha = 0.8,
                      alpha = 0.8,
                      position = position_points_jitter(
                        height = 0.1, width = 0)) + 
  scale_fill_manual(values = species_cols) + 
  scale_x_date(date_breaks = "3 months",
               date_labels = "%b") + 
  coord_cartesian(xlim = lubridate::as_date(c("2023-06-08", "2024-05-08"))) + 
  labs(x = "Day of Year", 
       y = "Species") + 
  theme_matt() + 
  #theme_ridges(grid = T) + 
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 270, hjust = 0, vjust = 0.5))

The samples captured the broad seasonal changes in calanoid copepod community composition in the lake. We note, however, that rare species (e.g. Senecella and Limnocalanus) were often preferentially sampled, so are over-represented in the data set.

Throughout the season, the prevalence of various unidentified pathogens also varied, with very little infection observed during the Winter and Spring.

pathogen_cols = c("no" = "grey95", "cloudy" = "honeydew3", "spot" = "antiquewhite3", "other" = "tomato3")

full_data %>% 
  select(collection_date, dev_eggs, pathogen, lipids, sp_name, sex) %>% 
  group_by() %>% 
  filter(sex != "juvenile") %>% 
  group_by(collection_date) %>% 
  count(pathogen) %>% 
  filter(pathogen != "uncertain") %>% 
  pivot_wider(id_cols = "collection_date", 
              names_from = pathogen, 
              values_from = n,
              values_fill = 0) %>% 
  mutate(total = sum(no, cloudy, spot, other)) %>% 
  pivot_longer(cols = c(no, cloudy, spot, other),
               names_to = "pathogen", 
               values_to = "count") %>% 
  mutate(percent = count/total,
         collection_date = lubridate::as_date(collection_date),
         pathogen = fct_relevel(pathogen, "no", "cloudy", "spot", "other")) %>% 
  ggplot(aes(x = collection_date, y = percent, fill = pathogen)) + 
  geom_area() + 
  scale_fill_manual(values = pathogen_cols) + 
  scale_y_continuous(breaks = c(0,1)) + 
  labs(x = "Collection Date", 
       y = "Proportion", 
       fill = "Pathogen") + 
  theme_minimal(base_size = 20) + 
  theme(panel.grid = element_blank(),
        axis.ticks = element_line())

The transparent bodies of these copepods also allowed us to examine seasonal patterns in lipid reserves and in the production of eggs. Maturing oocytes are visible in female copepods before they are released. There was no strong seasonal cycle in the production of these eggs in any species, and instead, females were reproductively active throughout their respective seasons of occurence.

dev_eggs_cols = c("no" = "grey95", "yes" = "lightblue3")

full_data %>% 
  select(collection_date, dev_eggs, pathogen, lipids, sp_name, sex) %>% 
  group_by(sp_name) %>% 
  filter(sex != "juvenile") %>% 
  group_by(sp_name, collection_date) %>% 
  count(dev_eggs) %>% 
  filter(dev_eggs != "uncertain") %>% 
  pivot_wider(id_cols = c("collection_date", "sp_name"), 
              names_from = dev_eggs, 
              values_from = n,
              values_fill = 0) %>% 
  mutate(total = sum(no, yes)) %>% 
  pivot_longer(cols = c(no, yes),
               names_to = "dev_eggs", 
               values_to = "count") %>% 
  mutate(percent = count/total,
         collection_date = lubridate::as_date(collection_date),
         dev_eggs = fct_relevel(dev_eggs, "no", "yes")) %>% 
  ungroup() %>% 
  complete(collection_date, nesting(sp_name, dev_eggs), fill = list(percent = 1)) %>% 
  mutate(percent = if_else(is.na(total) & dev_eggs == "yes", 0, percent)) %>% 
  ggplot(aes(x = collection_date, y = percent, fill = dev_eggs)) + 
  facet_wrap(sp_name~., ncol = 1) + 
  geom_area() + 
  scale_fill_manual(values = dev_eggs_cols) + 
  scale_y_continuous(breaks = c(0,1)) + 
  labs(x = "Collection Date", 
       y = "Proportion", 
       fill = "Developing \nEggs") + 
  theme_minimal(base_size = 20) + 
  theme(panel.grid = element_blank(),
        axis.ticks = element_line())

The presence of lipids varied across species, with only L. minutus, L. sicilis, and Limnocalanus regularly possessing lipid stores.

lipid_cols = c("no" = "grey95", "yes" = "sienna2")

full_data %>% 
  select(collection_date, dev_eggs, pathogen, lipids, sp_name, sex) %>% 
  group_by(sp_name) %>% 
  filter(sex != "juvenile") %>% 
  group_by(sp_name, collection_date) %>% 
  count(lipids) %>% 
  filter(lipids != "uncertain") %>% 
  pivot_wider(id_cols = c("collection_date", "sp_name"), 
              names_from = lipids, 
              values_from = n,
              values_fill = 0) %>% 
  mutate(total = sum(no, yes)) %>% 
  pivot_longer(cols = c(no, yes),
               names_to = "lipids", 
               values_to = "count") %>% 
  mutate(percent = count/total,
         collection_date = lubridate::as_date(collection_date),
         lipids = fct_relevel(lipids, "no", "yes")) %>% 
  ungroup() %>% 
  complete(collection_date, nesting(sp_name, lipids), fill = list(percent = 1)) %>% 
  mutate(percent = if_else(is.na(total) & lipids == "yes", 0, percent)) %>% 
  ggplot(aes(x = collection_date, y = percent, fill = lipids)) + 
  facet_wrap(sp_name~., ncol = 1) + 
  geom_area() + 
  scale_fill_manual(values = lipid_cols) + 
  scale_y_continuous(breaks = c(0,1)) + 
  labs(x = "Collection Date", 
       y = "Proportion", 
       fill = "Lipids\nPresent") + 
  theme_minimal(base_size = 20) + 
  theme(panel.grid = element_blank(),
        axis.ticks = element_line())

Temperature Variability

Lake Champlain is highly seasonal, with both average temperatures and temperature variability changing throughout the year. These patterns in the experienced thermal environment may drive the observed variation in copepod thermal limits. However, the time period affecting copepod thermal limits is unknown. Depending the on the duration of time considered, there are large changes in the experienced environment, in particular regarding the temperature range and variance. Consider for example three time periods: the day of collection, one week prior to collection, and four weeks prior to collection. While the overall pattern is similar, we can see that, unsurprisingly, considering longer periods of time results in larger ranges and slightly changes the pattern of variance experienced.

## Defining the function to get predictor values for periods of different lengths
get_predictors = function(daily_values, raw_temp, n_days){
  prefix = str_replace_all(xfun::numbers_to_words(n_days), pattern = " ", replacement = "-")
  
  mean_values = daily_values %>% 
    ungroup() %>% 
    mutate(mean_max = slide_vec(.x = max_temp, .f = mean, .before = n_days, .complete = T),
           mean_min = slide_vec(.x = min_temp, .f = mean, .before = n_days, .complete = T),
           mean_range = slide_vec(.x = range_temp, .f = mean, .before = n_days, .complete = T)) %>% 
    select(date, mean_max, mean_min, mean_range) %>% 
    rename_with( ~ paste(prefix, "day", .x, sep = "_"), .cols = c(-date))
  
  period_values = raw_temp %>% 
    mutate(mean = slide_index_mean(temp, i = date, before = days(n_days), 
                                   na_rm = T),
           max = slide_index_max(temp, i = date, before = days(n_days), 
                                 na_rm = T),
           min = slide_index_min(temp, i = date, before = days(n_days),
                                 na_rm = T),
           med = slide_index_dbl(temp, .i = date, .before = days(n_days), 
                                 na_rm = T, .f = median),
           var = slide_index_dbl(temp, .i = date, .before = days(n_days), 
                                 .f = var),
           range = max - min) %>%  
    select(-temp) %>%  
    distinct() %>% 
    rename_with( ~ paste(prefix, "day", .x, sep = "_"), .cols = c(-date))%>% 
    inner_join(mean_values, by = c("date")) %>%  
    drop_na()
  
  return(period_values)
}

Organisms are unlikely to acclimate instantaneously to changes in temperature. To explore the potential temporal window these copepods are responding to, we examined the correlation between thermal limits and summaries of the thermal environment over different periods of time. For each species (inclusive of all sexes and stages), we examined the correlation between CTmax and one of nine representations of the thermal environment calculated for periods of time from 1 to 60 days before collection. These parameters include the overall maximum, minimum, median, and mean temperature for the period of time, the temperature range and variance during this time, and the mean daily temperature maximum, minimum, and range. We also examined the correlation between CTmax and the temperature recorded at the time of collection.

Shown below are the correlation coefficients for these relationships. Each facet shows the relationship for a different parameter, plotted against the duration of the time period before collection.

corr_vals %>% 
  mutate(parameter = fct_relevel(parameter, c("min", "max", "range",
                                              "mean", "med", "var",
                                              "mean_min", "mean_max", "mean_range"))) %>% 
  ggplot(aes(x = duration, y = correlation, colour = sp_name)) + 
  facet_wrap(.~parameter) + 
  geom_hline(yintercept = 0) + 
  geom_point(size = 0.9) + 
  geom_line(linewidth = 1.5) + 
  scale_colour_manual(values = species_cols) + 
  labs(x = "Duration (days)",
       y = "Correlation", 
       colour = "Species") + 
  theme_matt_facets()

This table contains the top three factors for each species (based on correlation coefficient).

Shown here is a graphical summary of the duration of the best predictors for each species. Note that for the two Leptodiaptomids, collection temperature had the largest correlation coefficient so duration is zero. This representation highlights that there is variation across the community not only in the potential driver (e.g. minimum vs. maximum temperatures) but also in the duration of time. This variation is not grouped by season (the winter and summer communities both have representative species apparently responding to short and long durations).

duration_plot = corr_vals %>%  
  filter(sig == "Sig.") %>% 
  drop_na(correlation) %>% 
  group_by(sp_name) %>%
  arrange(desc(correlation)) %>% 
  slice_head(n = 1) %>% 
  ungroup() %>% 
  mutate("num" = row_number(), 
         sp_name = fct_reorder(sp_name, duration, .fun = mean, .desc = T)) %>% 
  arrange(sp_name) %>% 
  select("Species" = sp_name, "Predictor" = parameter, "Duration" = duration, "Correlation" = correlation, num) %>% 
  ggplot(aes(x = Species, y = Duration, fill = Predictor, group = num)) + 
  geom_bar(stat = "identity", width = 0.5, position = position_dodge(width = 0.6),
           colour = "black") + 
  scale_fill_manual(values = c("coll_temp" = "black", "max" = "white", "min" = "grey")) + 
  labs(x = "", 
       y = "Duration \n(days)") + 
  theme_matt() + 
  theme(axis.text.x = element_blank())
correlation_coef_plot = corr_vals %>%  
  filter(sig == "Sig." | parameter == "coll_temp") %>% 
  drop_na(correlation) %>% 
  group_by(sp_name) %>%
  filter(parameter == "coll_temp" | correlation == max(correlation)) %>% 
  arrange(sp_name, parameter) %>% 
  mutate("num" = row_number()) %>% 
  select("Species" = sp_name, "Predictor" = parameter, "Duration" = duration, "Correlation" = correlation, num) %>% 
  mutate(Predictor = if_else(Predictor == "coll_temp", Predictor, "best")) %>% 
  ungroup() %>% 
  mutate(Species = fct_reorder(Species, Duration, .fun = max, .desc = T)) %>% 
  ggplot(aes(x = Species, y = Correlation, fill = Predictor, group = num)) + 
  geom_bar(stat = "identity", width = 0.5, position = position_dodge(width = 0.6),
           colour = "black") + 
  labs(y = "Correlation \nCoefficient",
       fill = "Correlate") +
  scale_fill_manual(values = c("coll_temp" = "black", "best" = "white")) + 
  scale_y_continuous(breaks = c(0, 1), limits = c(0,1)) +
  theme_matt() + 
  theme(axis.text.x = element_text(angle = 300, hjust = 0, vjust = 0.5))

ggarrange(duration_plot, correlation_coef_plot, nrow = 2, legend = "right", labels = "AUTO",
          heights = c(0.4, 0.6))

Trait Variation

Shown below are the clutch size distributions for the three diaptomiid species, which produce egg sacs that allow for easy quantification of fecundity.

full_data %>%  
  drop_na(fecundity) %>%  
  ggplot(aes(x = fecundity, fill = sp_name_sub)) + 
  facet_wrap(.~sp_name_sub, ncol = 1) + 
  geom_histogram(binwidth = 2) + 
  scale_fill_manual(values = species_cols) + 
  labs(x = "Fecundity (# Eggs)") +
  theme_matt_facets() + 
  theme(legend.position = "none")

One of the main aims of this project is to examine the patterns and processes driving variation in upper thermal limits across these species of copepods.

Variation with temperature

We expect one of the primary drivers of copepod thermal limits to be temperature, as individuals acclimate to seasonal changes. Shown below are the seasonal patterns of when copepods were included in CTmax measurements (a proxy for the season of occurrence), and thermal limits for each species plotted against the temperature at the time of collection. We generally see an increase in thermal limits with increasing collection temperature.

sp_ctmax_temp = full_data %>% 
  filter(sp_name != "Osphranticum labronectum") %>% 
  mutate(sp_name = as.factor(sp_name),
         sp_name = fct_reorder(sp_name, ctmax, .desc = T)) %>% 
  ggplot(aes(x = collection_temp, y = ctmax, colour = sp_name)) + 
  facet_wrap(sp_name~.) + 
  geom_smooth(method = "lm", se = F, linewidth = 1.5, colour = "grey30") + 
  geom_point(size = 2, alpha = 0.4) + 
  labs(x = "Collection Temp. (°C)", 
       y = "CTmax (°C)") + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "none")

The interaction between seasonal changes in temperature and the acclimation of thermal limits likely affects vulnerability of each species to warming. Shown below are warming tolerance values for each species, calculated as the difference between individual CTmax and the temperature at the time of collection. All species maintained some degree of buffer between environmental temperatures and upper thermal limits, but L. minutus appears to approach its upper thermal limit during the warmest collections during the summer.

Also shown below is the relationship between fecundity (the number of eggs contained in a clutch) for the three diaptomid species. For the two Leptodiaptomus species, there is no relationship between clutch size and temperature, while there appears to be a general increase in clutch size with temperature in the Skistodiaptomus species.


wt_temp = full_data %>% 
  filter(sp_name != "Osphranticum labronectum") %>% 
  ggplot(aes(x = collection_temp, y = warming_tol, colour = sp_name)) + 
  geom_point(size = 3,
             alpha = 0.3) + 
  geom_smooth(method = "lm", linewidth = 3) +
  labs(x = "Collection Temperature (°C)", 
       y = "Warming Tolerance (°C)",
       colour = "Species")  + 
  ylim(0,30) + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "none")

eggs_temp = full_data %>% 
  filter(sp_name != "Osphranticum labronectum") %>% 
  ggplot(aes(x = collection_temp, y = fecundity, colour = sp_name)) + 
  geom_point(size = 3,
             alpha = 0.3) + 
  geom_smooth(method = "lm", linewidth = 3) +
  labs(x = "Collection Temperature (°C)", 
       y = "Fecundity (# Eggs)",
       colour = "Species")  + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "right")

ggarrange(wt_temp, eggs_temp, 
          common.legend = T, legend = "right", labels = "AUTO")

# Copepods spent several days in lab during experiments. Shown below are the CTmax residuals (taken from a model of CTmax against collection temperature) plotted against the time spent in lab before measurements were made. Individual regressions are shown for the residuals against days in lab for each collection. We can see clearly that thermal limits are fairly stable over time. 


# ggplot(ctmax_resids, aes(x = days_in_lab, y = resids, colour = sp_name, group = collection_date)) + 
#   facet_wrap(sp_name~.) + 
#   geom_point(size = 4, alpha = 0.5) + 
#   geom_smooth(method = "lm", se = F, linewidth = 1) + 
#   #scale_x_continuous(breaks = c(0:5)) + 
#   labs(x = "Days in lab", 
#        y = "CTmax Residuals") + 
#   scale_colour_manual(values = species_cols) + 
#   theme_matt_facets() + 
#   theme(legend.position = "none")

model_data = full_data %>%  
  drop_na(size, ctmax) %>%  
  filter(sp_name != "Osphranticum labronectum") %>% 
  mutate(temp_cent = scale(collection_temp, center = T, scale = F),
         size_cent = scale(size, center = T, scale = F))

minimal.model = lme4::lmer(data = model_data,
                           ctmax ~ sp_name + sex + temp_cent +
                             (1|days_in_lab))

full.model = lme4::lmer(data = model_data,
                        ctmax ~ sp_name*sex*temp_cent +
                          (1|days_in_lab))

drop1(full.model, test = "Chisq")
## Single term deletions
## 
## Model:
## ctmax ~ sp_name * sex * temp_cent + (1 | days_in_lab)
##                       npar    AIC    LRT Pr(Chi)  
## <none>                     5280.1                 
## sp_name:sex:temp_cent   10 5281.0 20.887  0.0219 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

performance::test_performance(minimal.model, full.model)
## Name          |   Model |    BF | df | df_diff |   Chi2 |      p
## ----------------------------------------------------------------
## minimal.model | lmerMod |       | 11 |         |        |       
## full.model    | lmerMod | 0.001 | 38 |   27.00 | 180.70 | < .001
## Models were detected as nested (in terms of fixed parameters) and are compared in sequential order.
performance::check_model(full.model)


car::Anova(full.model, type = "III")
## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: ctmax
##                          Chisq Df Pr(>Chisq)    
## (Intercept)           4619.014  1  < 2.2e-16 ***
## sp_name                344.724  5  < 2.2e-16 ***
## sex                     51.233  2  7.495e-12 ***
## temp_cent               53.731  1  2.299e-13 ***
## sp_name:sex             10.611 10    0.38866    
## sp_name:temp_cent       39.512  5  1.873e-07 ***
## sex:temp_cent           31.961  2  1.147e-07 ***
## sp_name:sex:temp_cent   20.616 10    0.02393 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

sp_ctmax = emmeans::emmeans(full.model, specs = "sp_name") %>% 
  data.frame() %>% 
  select(sp_name, "species_ctmax" = emmean)

model_coefs = emmeans::emtrends(full.model, var = "temp_cent", specs = "sp_name") %>% 
  data.frame() %>% 
  inner_join(sp_ctmax) 

ctmax_resids = model_data %>% 
  mutate(resids = residuals(full.model))

#write.csv(model_coefs, "Output/Data/ARR_data.csv")
arr_combined = synth_arr %>%
  filter(measure == "upper" & mean_lim > 20) %>% 
  select("group" = genus, arr, mean_lim) %>% 
  mutate("dataset" = "synthesis") %>% 
  bind_rows(
    select(model_coefs, "group" = sp_name, 'arr' = temp_cent.trend, 'mean_lim' = species_ctmax)
  ) %>% 
  mutate(dataset = if_else(is.na(dataset), "new data", "synthesis"),
         group = fct_reorder(group, arr, .desc = T))


ggplot(arr_combined, aes(x = mean_lim, y = arr)) + 
  geom_smooth(method = "lm", se = F, 
              linewidth = 2, colour = "grey30") + 
  geom_point(data = filter(arr_combined, dataset != "new data"), 
             size = 3, colour = "black", shape = 1, stroke = 1.4) + 
  geom_point(data = filter(arr_combined, dataset == "new data"),
             aes(colour = group), 
             size = 4.5) + 
  scale_colour_manual(values = species_cols) + 
  labs(x = "Thermal Limit", 
       y = "ARR", 
       colour = "Species") +
  theme_matt() + 
  theme(legend.position = "right")

Sex and stage variation in thermal limits

Previous sections have generally lumped juvenile, female, and male individuals together. There may be important stage- or sex-specific differences in CTmax though. For all species but Osphranticum, we have measurements for individuals in different stages and of different sexes.

sex_sample_sizes = full_data %>%  
  group_by(sp_name, sex) %>%  
  summarise(num = n()) %>%  
  pivot_wider(id_cols = sp_name,
              names_from = sex, 
              values_from = num,
              values_fill = 0) %>% 
  select("Species" = sp_name, "Juvenile" = juvenile, "Female" = female, "Male" = male)

knitr::kable(sex_sample_sizes, align = "c")
Species Juvenile Female Male
Epischura lacustris 37 45 20
Leptodiaptomus minutus 12 273 39
Leptodiaptomus sicilis 31 356 95
Limnocalanus macrurus 4 43 39
Osphranticum labronectum 0 1 0
Senecella calanoides 13 21 8
Skistodiaptomus sp 15 232 28

Across group comparisons show that there are generally no differences in thermal limits (represented here as the residuals from a CTmax ~ collection_temp x species linear regression), with the exception of Senecella males, which may have lower thermal limits (although sample sizes are very small in this group).

# ctmax_resids %>% 
#   filter(sp_name != "Osphranticum labronectum") %>% 
#   ggplot(aes(x = sex, y = resids, colour = sp_name)) + 
#   facet_wrap(sp_name~.) + 
#   geom_jitter(width = 0.1, alpha = 0.5) + 
#   geom_boxplot(width = 0.4, fill = NA, colour = "black", 
#                linewidth = 1, outlier.colour = NA) + 
#   scale_colour_manual(values = species_cols) + 
#   theme_matt_facets()
model2_data = model_data %>% 
  filter(sex == "female", 
         pathogen != "uncertain", 
         dev_eggs != "uncertain", 
         lipids != "uncertain") %>% 
  mutate(pathogens = fct_relevel(pathogen, "no", "spot", "cloudy", "other"))

other_factor_model = lmer(data = model2_data, 
                          ctmax~sp_name * collection_temp + dev_eggs + pathogen + lipids + (1|days_in_lab))

drop1(other_factor_model, scope = ~., test = "Chisq")
## Single term deletions
## 
## Model:
## ctmax ~ sp_name * collection_temp + dev_eggs + pathogen + lipids + 
##     (1 | days_in_lab)
##                         npar    AIC     LRT   Pr(Chi)    
## <none>                       3601.6                      
## sp_name                    5 3804.2 212.619 < 2.2e-16 ***
## collection_temp            0 3601.6   0.000              
## dev_eggs                   1 3602.0   2.391    0.1220    
## pathogen                   3 3629.4  33.832 2.149e-07 ***
## lipids                     1 3600.0   0.416    0.5189    
## sp_name:collection_temp    5 3635.7  44.139 2.171e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

reduced_factors_model = lmer(data = model2_data, 
                          ctmax~sp_name * collection_temp + pathogen + (1|days_in_lab))

performance::check_model(reduced_factors_model)


car::Anova(reduced_factors_model, type = "III")
## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: ctmax
##                           Chisq Df Pr(>Chisq)    
## (Intercept)             570.328  1  < 2.2e-16 ***
## sp_name                 234.027  5  < 2.2e-16 ***
## collection_temp          72.589  1  < 2.2e-16 ***
## pathogen                 35.282  3  1.062e-07 ***
## sp_name:collection_temp  43.558  5  2.847e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans::emmeans(reduced_factors_model, spec = "pathogen") %>% emmeans::contrast(method="trt.vs.ctrl",ref="no") %>% plot() + 
  geom_vline(xintercept = 0) + 
  labs(x = "Difference (°C)", 
       y = "Comparison") + 
    theme_matt()

Trait Correlations and Trade-offs

A relationship between size and upper thermal limits has been suggested in a wide range of other taxa. Shown below are the measured upper thermal limits plotted against prosome length. The overall relationship (inclusive of all species) is shown as the black line in the background. Regressions for each individual species are also shown. Across the entire assemblage, there is a strong decrease in thermal limits with increasing size.

full_data %>% 
  #filter(sex == "female") %>%  
  ggplot( aes(x = size, y = ctmax, colour = sp_name)) + 
  geom_point(size = 2, alpha = 0.3) + 
  geom_smooth(data = full_data, 
              aes(x = size, y = ctmax),
              method = "lm", 
              colour ="black", 
              linewidth = 2.5) + 
  labs(x = "Length (mm)", 
       y = "CTmax (°C)",
       colour = "Species") + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "right")

Shown here is the relationship for each species individually.

full_data %>% 
  #filter(sex == "female") %>%  
  group_by(sp_name) %>% filter(n() >2) %>% filter(!str_detect(sp_name, pattern = "kindti")) %>% 
  ggplot( aes(x = size, y = ctmax, colour = sp_name)) + 
  facet_wrap(sp_name~., scales = "free", nrow = 2) + 
  geom_point(size = 2, alpha = 0.8) + 
  geom_smooth(method = "lm", se = F, linewidth = 2) + 
  labs(x = "Length (mm)", 
       y = "CTmax (°C)",
       colour = "Species") + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "none")

Shown below is the relationship between mean size and mean thermal limits for females of each species. We see that larger species within the community tend to have a lower thermal limit than smaller species.

full_data %>% 
  group_by(sp_name, sex) %>% 
  summarize(mean_ctmax = mean(ctmax, na.rm = T),
            mean_size = mean(size, na.rm = T)) %>% 
  #filter(sex == "female") %>% 
  ggplot(aes(x = mean_size, y = mean_ctmax)) + 
  geom_smooth(method = "lm", se = F, linewidth = 2, colour = "black") + 
  geom_point(aes(colour = sp_name, shape = sex),
             size = 5) + 
  labs(x = "Length (mm)", 
       y = "CTmax (°C)",
       colour = "Species") + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "right")

Shown here is the relationship between fecundity and size, showing the classic pattern of increasing egg production with increasing size.

size_fecund_plot = ctmax_resids %>%  
  drop_na(fecundity) %>% 
  ggplot(aes(x = size, y = fecundity, colour = sp_name)) + 
  geom_smooth(method = "lm", se = F, linewidth = 2) + 
  geom_point(size = 2, alpha = 0.5) + 
  labs(x = "Prosome length (mm)", 
       y = "Fecundity (# Eggs)",
       colour = "Species") + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "right")

Individuals may also allocate energy to different fitness related traits, prioritizing reproductive output over environmental tolerance, for example. Shown below is the relationship between CTmax residuals (again, controlling for the effects of collection temperature) against fecundity. We can see clearly that individuals with increased fecundity are not decreasing thermal limits, suggesting that there is no energetic trade-off between these traits.

ctmax_fecund_plot = ctmax_resids %>%  
  drop_na(fecundity) %>% 
  ggplot(aes(x = resids, y = fecundity, colour = sp_name)) + 
  geom_smooth(method = "lm", se = F, linewidth = 2) + 
  geom_point(size = 2, alpha = 0.5) + 
  labs(x = "CTmax Residuals (°C)", 
       y = "Fecundity (# Eggs)",
       colour = "Species") + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "right")

ggarrange(size_fecund_plot, ctmax_fecund_plot, ncol = 1, common.legend = T, labels = "AUTO", legend = "right")

Previous studies have shown that the magnitude of the size-fecundity correlation may be environmentally-dependent. This is not visible is the data from these collections.

corr_data = full_data %>% 
  drop_na(fecundity) %>% 
  filter(sp_name %in% c("Leptodiaptomus sicilis",
                        "Leptodiaptomus minutus", 
                        "Skistodiaptomus sp")) %>%  
  group_by(collection_date, collection_temp, sp_name) %>% 
  summarise(size_fec_corr = cor(size, fecundity),
            n = n(),
            mean_fecundity = mean(fecundity)) %>% 
  filter(n >= 3) %>% 
  ungroup() %>%  
  group_by(sp_name) %>% 
  mutate(temp_cent = scale(collection_temp, scale = F))

ggplot(corr_data, aes(x = temp_cent, y = size_fec_corr, colour = sp_name)) + 
  facet_wrap(sp_name~., nrow = 3) + 
  geom_hline(yintercept = 0) +
  geom_point(size = 3) + 
  geom_smooth(method = "lm", linewidth = 2) + 
  scale_color_manual(values = species_cols) + 
  labs(x = "Temperature (centered)", 
       y = "Correlation Coefficient") + 
  coord_cartesian(ylim = c(-1, 1)) +
  theme_matt_facets() + 
  theme(legend.position = "none")


# ggplot(corr_data, aes(x = size_fec_corr)) +
#     facet_wrap(sp_name~., nrow = 3) +
#   geom_histogram(binwidth = 0.2)

Other patterns in variation

Leptodiaptomus sicilis is the most abundant species during the winter. There was a large shift in the size of mature females towards the end of December. These large and small individuals are the same species (confirmed via COI sequencing), suggesting this shift may instead reflect a transition from one generation to another. This size difference may be caused by differences in the developmental environments. For example, individuals developing in January grow up at very low temperatures, and therefore may reach larger sizes. These individuals over-summer in deep waters, then re-emerge in October and produce a new generation. Water temperatures are still fairly high through November, which results in a generation of smaller individuals. These individuals mature in time to produce a new generation in January.

full_data %>%  
  filter(sp_name == "Leptodiaptomus sicilis") %>% 
  filter(sex != "juvenile") %>% 
  group_by(collection_date) %>% 
  mutate(size_center = scale(size, center = T, scale = F)) %>% 
  ggplot(aes(y = factor(collection_date), x = size, fill = collection_temp)) + 
  facet_wrap(sex~.) + 
  geom_density_ridges(bandwidth = 0.04) + 
  geom_vline(xintercept = 0.89) + 
  labs(x = "Size (mm)",
       y = "Date", 
       fill = "Coll. Temp. (°C)") + 
  theme_matt() + 
  theme(legend.position = "right",
        axis.text.y = element_text(size = 12))

A similar, but less distinct pattern can be observed in L. minutus individuals as well.

full_data %>%  
  filter(sp_name == "Leptodiaptomus minutus") %>% 
  filter(sex != "juvenile") %>% 
  ggplot(aes(y = factor(collection_date), x = size, fill = collection_temp)) + 
  facet_wrap(sex~.) + 
  geom_density_ridges(bandwidth = 0.04) + 
  geom_vline(xintercept = 0.69) + 
  labs(x = "Size (mm)",
       y = "Date", 
       fill = "Coll. Temp. (°C)") + 
  coord_cartesian(xlim = c(0.5,0.9)) + 
  theme_matt() + 
  theme(legend.position = "right",
        axis.text.y = element_text(size = 12))

Distribution Lag Non-Linear Model (DLNM approach)

Distributed lag models examine a response variable, measured at multiple time points, as a function of the lagged occurrence of some predictor variable (response y at time t as a function of predictor x(t-lag). This method utilizes a bi-dimensional dose-lag-response function, which essentially examines not only the dose effect, but the effect of the timing of the dose.

# Run this code, save the product, and then just read in the temp lag data object. Takes too long to run each time this document is knit. 

# lag_temps = temp_data %>%
#   group_by(date, hour) %>%
#   summarize("mean_temp" = mean(temp, na.rm = T)) %>%
#   ungroup() %>%
#   mutate(point_num = row_number())
# 
# uniq_days = length(unique(lag_temps$date))
# 
# g = gam(mean_temp ~ s(point_num, bs="cr", k=uniq_days + 10),
#     method = "REML",
#     data = lag_temps)
# 
# points = seq(1, nrow(lag_temps), length.out = length(lag_temps$hour))
# 
# df.res = df.residual(g)
# 
# pred_temps = predict(g, newdata = lag_temps, type = "response", se.fit = TRUE)
# 
# lag_temps = lag_temps %>%
#   mutate(trend_T = pred_temps$fit,
#          trend_se = pred_temps$se.fit,
#          temp_diff = mean_temp - trend_T)
# 
# write.csv(lag_temps, file = "./Output/Data/lag_temps.csv", row.names = F)

dlnm_data = full_data %>%  
  filter(sex == "female") %>% 
  filter(sp_name %in% c(
    "Leptodiaptomus sicilis",
    "Leptodiaptomus minutus"
  )) %>% 
  select(collection_date, collection_temp, sp_name, ctmax) %>% 
  group_by(collection_date, collection_temp, sp_name) %>%  
  summarise(mean_ctmax = mean(ctmax, na.rm = T),
            sample = n())

temp_data %>% 
  group_by(date) %>% 
  summarise(mean_temp = mean(temp),
            max_temp = max(temp, na.rm = T)) %>% 
  right_join(dlnm_data, by = join_by("date" == "collection_date")) %>% 
  ggplot(aes(x = max_temp, y = mean_ctmax)) + 
  facet_wrap(.~sp_name) + 
  geom_smooth(method = "gam") + 
  geom_point() + 
  labs(x = "Max Daily Temp. (°C)",
       y = "Mean CTmax (°C)") + 
  theme_matt_facets() + 
  theme(strip.text.x = element_text(size = 12))


sp_list = unique(dlnm_data$sp_name)

for(lag_species in sp_list){
  
  dlnm_data_sp = dlnm_data %>% 
    filter(sp_name == lag_species)
  
  # We need to estimate a matrix of exposure histories for each observation. This contains the series of exposures at each lag (l) for each of the n observations, constrained between l0 (minimum lag) and L (max lag). 
  
  dates = dlnm_data_sp$collection_date # For each of these dates, make a vector of the past 30 days (including the day of collection). NOTE: Don't use 'unique' dates here since some collections had multiple species
  
  exp_hist_z = data.frame()
  exp_hist_trend = data.frame()
  
  for(d in dates){
    
    history = lag_temps %>% 
      filter(date <= d & date > d - 10) %>% 
      arrange(desc(date), desc(hour)) %>% 
      mutate(lag = row_number() - 1) %>% 
      select(lag, mean_temp, temp_diff)
    
    z_vec = scale(history$mean_temp)[,1]
    names(z_vec) = history$lag
    
    trend_vec = history$temp_diff
    names(trend_vec) = history$lag
    
    exp_hist_z = bind_rows(exp_hist_z, z_vec)
    exp_hist_trend = bind_rows(exp_hist_trend, trend_vec)
    
  }
  
  #print(max(exp_hist_trend, na.rm = T))
  
  # The cross-basis function from dlnm will use the class of the x parameter to determine what to do. In our case, we need to provide it with the matrix of exposure histories for reach observation (row) and lag (column). 
  
  cb_temps = crossbasis(exp_hist_trend, lag = c(0,dim(exp_hist_trend)[2]-1), 
                        argvar =list(fun="cr",df=3), 
                        arglag=list(fun="cr",df=3,intercept=T))
  
  #summary(cb_temps)
  
  penalized_mat <- cbPen(cb_temps)
  
  #fitting GAM
  lag.gam = gam(data = dlnm_data_sp, 
                mean_ctmax ~ collection_temp + cb_temps, 
                method = "GCV.Cp",
                paraPen=list(cb_temps=penalized_mat))
  
  # summary(lag.gam)
  # AIC(lag.gam)
  
  #estimation of exposures effects
  
  #default plots
  pred_gam_Zs<-crosspred(cb_temps, lag.gam, 
                         cumul=F, cen=0, ci.level = 0.95,
                         at=seq(-4,4, 0.1))
  
  plot(pred_gam_Zs, "contour", main = lag_species, 
              xlab = "Temperature Deviation (°C)", 
              ylab = "Hours before collection")
  
  
  # 
  # plot(pred_gam_Zs, border = 2, cumul=F,
  #       theta=110,phi=20,ltheta=-80)
  
  # plot(pred_gam_Zs, "slices",
  #      var = c(3,-3),
  #      lag = c(1,200),
  #      col = 2)
  # 
}

Miscellany

run_starts = temp_record %>% 
  group_by(run) %>% 
  filter(minute_passed <= 3) %>% 
  summarise(start_temp = mean(temp_C)) %>% 
  mutate("temp_bin" = cut_number(start_temp, 4),
         temp_bin = case_when(
           temp_bin == "[2.7,9.26]" ~ "[2.7°C - 9.26°C]",
           temp_bin == "(9.26,13.9]" ~ "[9.26°C - 13.9°C]",
           temp_bin == "(13.9,21]" ~ "[13.9°C - 21°C]",
           temp_bin == "(21,30.5]" ~ "[21°C - 30.5°C]"
         ))

ramp_record2 = ramp_record %>% 
  group_by(run, minute_interval) %>% 
  summarise(mean_ramp = mean(ramp_per_minute)) %>% 
  ungroup() %>% 
  left_join(run_starts) %>% 
  mutate(temp_bin = fct_reorder(temp_bin, start_temp, .fun = mean))

ggplot(ramp_record2, aes(x = minute_interval, y = mean_ramp)) + 
  facet_wrap(temp_bin~.) + 
  geom_hline(yintercept = 0.3, colour = "grey") + 
  geom_hline(yintercept = 0.1, colour = "grey") + 
  geom_hex(aes(fill = cut(..count.., c(2, 5, 10, 20, 30, 40, 50))),
           bins = 30) + 
  scale_fill_viridis_d(name="Number of Observations",
                       labels = c("<5", "5-9", "10-19", "20-29", "30-39", "40-50"),
                       option = "mako") + 
  labs(y = "Ramp Rate (deg. C / min.)",
       x = "Time into run (minute)") + 
  theme_matt_facets(base_size = 12)


if(predict_vuln == F){

  knitr::knit_exit()

}
---
title: Seasonality in Lake Champlain Copepod Thermal Limits
date: "`r Sys.Date()`"
output: 
  html_document:
          code_folding: hide
          code_download: true
          toc: true
          toc_float: true
  github_document:
          html_preview: false
          toc: true
          toc_depth: 3
---

```{r setup, include=T, message = F, warning = F, echo = F}
knitr::opts_chunk$set(
  echo = knitr::is_html_output(),
  fig.align = "center",
  fig.path = "../Figures/markdown/",
  dev = c("png", "pdf"),
  message = FALSE,
  warning = FALSE,
  collapse = T
)

theme_matt = function(base_size = 18,
                      dark_text = "grey20"){
  mid_text <-  monochromeR::generate_palette(dark_text, "go_lighter", n_colours = 5)[2]
  light_text <-  monochromeR::generate_palette(dark_text, "go_lighter", n_colours = 5)[3]
  
  ggpubr::theme_pubr(base_family="sans") %+replace% 
    theme(
      panel.background  = element_rect(fill="transparent", colour=NA), 
      plot.background = element_rect(fill="transparent", colour=NA), 
      legend.background = element_rect(fill="transparent", colour=NA),
      legend.key = element_blank(),
      text = element_text(colour = mid_text, lineheight = 1.1),
      title = element_text(size = base_size * 1.5,
                           colour = dark_text),
      axis.text = element_text(size = base_size,
                               colour = mid_text),
      axis.title.x = element_text(size = base_size * 1.2,
                                  margin = unit(c(3, 0, 0, 0), "mm")),
      axis.title.y = element_text(size = base_size * 1.2,
                                  margin = unit(c(0, 5, 0, 0), "mm"), 
                                  angle = 90),
      legend.text = element_text(size=base_size * 0.9),
      legend.title = element_text(size = base_size * 0.9, 
                                  face = "bold"),
      plot.margin = margin(0.25, 0.25, 0.25, 0.25,"cm")
    )
}

theme_matt_facets = function(base_size = 18,
                             dark_text = "grey20"){
  mid_text <-  monochromeR::generate_palette(dark_text, "go_lighter", n_colours = 5)[2]
  light_text <-  monochromeR::generate_palette(dark_text, "go_lighter", n_colours = 5)[3]
  
  theme_bw(base_family="sans") %+replace% 
    theme(
      panel.grid = element_blank(),
      panel.background  = element_rect(fill="transparent", colour=NA), 
      plot.background = element_rect(fill="transparent", colour=NA), 
      legend.background = element_rect(fill="transparent", colour=NA),
      legend.key = element_rect(fill="transparent", colour=NA),
      text = element_text(colour = mid_text, lineheight = 1.1),
      strip.text.x = element_text(size = base_size),
      title = element_text(size = base_size * 1.5,
                           colour = dark_text),
      axis.text = element_text(size = base_size,
                               colour = mid_text),
      axis.title.x = element_text(size = base_size * 1.2,
                                  margin = unit(c(3, 0, 0, 0), "mm")),
      axis.title.y = element_text(size = base_size * 1.2,
                                  margin = unit(c(0, 5, 0, 0), "mm"), 
                                  angle = 90),
      legend.text = element_text(size=base_size * 0.9),
      legend.title = element_text(size = base_size * 0.9, 
                                  face = "bold"),
      plot.margin = margin(0.25, 0.25, 0.25, 0.25,"cm")
    )
}

species_cols = c("Leptodiaptomus minutus" = "#ffd029",
                 "Leptodiaptomus minutus juvenile" = "#e3d8af",
                 "Leptodiaptomus minutus male" = "#ffe896",
                 "Leptodiaptomus sicilis" = "#f08f46",
                 "Leptodiaptomus sicilis male" = "#E28C00",
                 "Skistodiaptomus sp" = "#C5C35A",
                 "Skistodiaptomus sp male" = "#d9d893", 
                 "Skistodiaptomus sp juvenile" = "#e6e6aa", 
                 "Epischura lacustris juvenile" = "plum1", 
                 "Epischura lacustris male" = "plum3", 
                 "Epischura lacustris" = "plum4", 
                 "Limnocalanus macrurus" = "skyblue4", 
                 "Limnocalanus macrurus male" = "skyblue3", 
                 "Limnocalanus macrurus juvenile" = "skyblue", 
                 "Senecella calanoides" = "darkseagreen3",
                 "Leptodora kindti male" = "lightblue3",
                 "Leptodora kindti" = "lightblue4",
                 "Leptodora kindti juvenile" = "lightblue",
                 "Osphranticum labronectum" = "firebrick3")
```

## Copepod Collection

Copepods were collected at approximately weekly intervals from Lake Champlain (Burlington Fishing Pier). Plankton was collected from the top 3 meters using a 250 um mesh net. 

```{r}
# # Lake Champlain near Burlington, VT
# siteNumber = "04294500"
# ChamplainInfo = readNWISsite(siteNumber)
# parameterCd = "00010"
# startDate = "2023-01-01"
# endDate = "2024-5-20"
# #statCd = c("00001", "00002","00003", "00011") # 1 - max, 2 - min, 3 = mean
# 
# # Constructs the URL for the data wanted then downloads the data
# url = constructNWISURL(siteNumbers = siteNumber, parameterCd = parameterCd,
#                        startDate = startDate, endDate = endDate, service = "uv")
# 
# raw_temps = importWaterML1(url, asDateTime = T) %>%
#   mutate("date" = as.Date(dateTime),
#          "hour" = hour(dateTime)) %>%
#   select(dateTime, tz_cd, date, hour, degC = X_00010_00000)
# 
# temp_data =  raw_temps %>%
#   select(date, hour, "temp" = degC)
# 
# write.csv(temp_data, file = "./Output/Data/champlain_temps.csv", row.names = F)
```

Collections began in late May 2023. Several gaps are present, but collections have continued at roughly weekly intervals since then. Copepods from `r length(unique(full_data$collection_date))` collections were used to make a total of `r dim(full_data)[1]` thermal limit measurements. Over this time period, collection temperatures ranged from `r paste(min(full_data$collection_temp), " to ", max(full_data$collection_temp), sep = "")`°C.     

There is substantial variation in thermal limits across the species collected. There is also some degree of variation within the species, with thermal limits increasing slightly during the summer.    

```{r main-fig-ctmax-timeseries, fig.width=9, fig.height=5}
## Daily values for the period examined by dataset
collection_conditions = temp_data %>%
  ungroup() %>% 
  group_by(date) %>% 
  summarise(mean_temp = mean(temp),
            med_temp = median(temp),
            var_temp = var(temp), 
            min_temp = min(temp), 
            max_temp = max(temp)) %>% 
  mutate("range_temp" = max_temp - min_temp,
         date = as.Date(date)) %>% 
  ungroup() %>%  
  filter(date >= (min(as.Date(full_data$collection_date)) - 7)) %>% 
  left_join(unique(select(full_data, collection_date, collection_temp)), by = join_by(date == collection_date))

## Mean female thermal limits for each species, grouped by collection
species_summaries = full_data %>%  
  #filter(sex == "female") %>% 
  group_by(sp_name, collection_date, collection_temp) %>%  
  summarise("mean_ctmax" = mean(ctmax),
            "sample_size" = n(),
            "ctmax_st_err" = (sd(ctmax) / sqrt(sample_size)),
            "ctmax_var" = var(ctmax), 
            "mean_size" = mean(size),
            "size_st_err" = (sd(size) / sqrt(sample_size)),
            "size_var" = var(size)) %>%  
  ungroup() %>% 
  complete(sp_name, collection_date) %>% 
  arrange(desc(sample_size))

adult_summaries = full_data %>%  
  filter(sex == "female") %>% 
  group_by(sp_name, collection_date, collection_temp) %>%  
  summarise("mean_ctmax" = mean(ctmax),
            "sample_size" = n(),
            "ctmax_st_err" = (sd(ctmax) / sqrt(sample_size)),
            "ctmax_var" = var(ctmax), 
            "mean_size" = mean(size),
            "size_st_err" = (sd(size) / sqrt(sample_size)),
            "size_var" = var(size)) %>%  
  ungroup() %>% 
  complete(sp_name, collection_date) %>% 
  arrange(desc(sample_size))


tseries_data = full_data %>% 
  mutate(sp_name = fct_reorder(sp_name, ctmax, .desc = T))

ggplot() + 
  geom_line(data = collection_conditions, 
            aes(x = as.Date(date), y = mean_temp),
            colour = "black", 
            linewidth = 1) + 
  geom_point(data = filter(tseries_data, species != "osphranticum_labronectum"), 
             aes(x = as.Date(collection_date), y = ctmax, colour = sp_name),
             size = 1.5, shape = 16, alpha = 0.9,
             position = position_jitter(width = 0, height = 0)) + 
    geom_point(data = filter(tseries_data, species == "osphranticum_labronectum"), 
             aes(x = as.Date(collection_date), y = ctmax, colour = sp_name),
             size = 2, shape = 16, alpha = 0.9,
             position = position_jitter(width = 0, height = 0)) + 
  scale_colour_manual(values = species_cols) + 
  guides(colour = guide_legend(override.aes = list(alpha = 1, size = 5))) + 
  labs(x = "Date", 
       y = "Temperature (°C)", 
       colour = "Species",
       size = "Sample Size") + 
  theme_matt() + 
  theme(legend.position = "right", 
        axis.text.x = element_text(angle = 320, hjust = 0, vjust = 0.5))

lake_temps = ggplot() + 
  geom_line(data = collection_conditions, 
            aes(x = as.Date(date), y = mean_temp),
            colour = "black", 
            linewidth = 1) + 
  labs(x = "Date", 
       y = "Temperature (°C)", 
       colour = "Species",
       size = "Sample Size") + 
  theme_matt() + 
  theme(legend.position = "right")
```

Temperatures observed at the time of collection closely resembled the maximum daily temperature from the temperature sensor data. Maximum temperature was used as a proxy instead of mean temperature as collections were usually made during afternoons or early evenings, just following the warmest part of the day. 

```{r supp-fig-temp-acc, fig.width = 6, fig.height=6}
collection_conditions %>% 
  drop_na(collection_temp) %>%  
  ggplot(aes(x = max_temp, y = collection_temp)) + 
  geom_abline(intercept = 0, slope = 1,
              linewidth = 1, colour = "grey") + 
  geom_point(size = 3) +
  scale_x_continuous(breaks = c(5,15,25)) + 
  scale_y_continuous(breaks = c(5,15,25)) + 
  labs(x = "Max. Temp. from Sensor (°C)",
       y = "Collection Temp. (°C)") + 
  theme_matt()
```

```{r misc-round-summary-2, fig.width=11, fig.height=11, include = F}
ggplot() + 
  geom_hline(yintercept = 
               c(max(full_data$collection_temp),
                 min(full_data$collection_temp)), 
             colour = "grey60",
             linewidth = c(2,1), 
             alpha = 0.5) + 
  geom_bar(data = unique(select(full_data, collection_date, collection_temp)), 
           aes(x = as_date(collection_date), y = collection_temp),
           stat = "identity",
           fill = "grey30") + 
  geom_point(data = full_data, 
             aes(x = as_date(collection_date), y = ctmax),
             position = position_jitter(width = 0.7, height = 0),
             colour = "grey30",
             alpha = 0.5) + 
  ylim(-3, 40) + 
  coord_polar(start = 0) + 
  theme_void()
```


Size also varied, but primarily between rather than within species. 

```{r supp-fig-size-timeseries, fig.width=10, fig.height=5}
ggplot() + 
  geom_vline(data = unique(select(full_data, collection_date)), 
             aes(xintercept = as.Date(collection_date)),
             colour = "grey90",
             linewidth = 1) + 
  geom_line(data = collection_conditions, 
            aes(x = as.Date(date), y = mean_temp),
            colour = "black", 
            linewidth = 2) + 
  # geom_errorbar(data = species_summaries,
  #               aes(x = as.Date(collection_date), 
  #                   ymin = mean_ctmax - ctmax_st_err, ymax = mean_ctmax + ctmax_st_err,
  #                   colour = sp_name),
  #               position = position_dodge(width = 1),
  #               width = 5, linewidth = 1) + 
  geom_point(data = adult_summaries, 
             aes(x = as.Date(collection_date), y = mean_size * 40, colour = sp_name, size = sample_size),
             position = position_dodge(width = 1)) + 
  scale_colour_manual(values = species_cols) + 
  scale_y_continuous(
    name = "Temperature", # Features of the first axis
    sec.axis = sec_axis(~./40, name="Prosome Length (mm)"), # Add a second axis and specify its features
    breaks = c(0,5,10,15,20,25,30)
  ) + 
  labs(x = "Date", 
       y = "Temperature (°C)", 
       colour = "Species") + 
  theme_matt() + 
  theme(legend.position = "right")
```

```{r misc-trait-doy-feature, fig.width = 14, fig.height = 7, include = F}
#Shown below is CTmax and body size for the species with the most data (*Skistodiaptomus*, *L. minutus*, *L. sicilis*, and *Epischura*), plotted against the day of the year for each sex/stage separately. 

ctmax_feature = full_data %>%  
  mutate(doy = yday(collection_date)) %>% 
  filter(sp_name %in% c("Skistodiaptomus oregonensis", "Leptodiaptomus minutus", "Leptodiaptomus sicilis", "Epischura lacustris")) %>% 
  ggplot(aes(x = as.Date(collection_date), y = ctmax, colour = sp_name)) + 
  facet_grid(sp_name~sex) + 
  geom_point() + 
  scale_colour_manual(values = species_cols) + 
  labs(x = "Day of the Year", 
       y = "CTmax (°C)") + 
  theme_matt_facets() +
  theme(axis.text.x = element_text(angle = 300, hjust = 0, vjust = 0.5),
        legend.position = "none")

size_feature = full_data %>%  
  mutate(doy = yday(collection_date)) %>% 
  filter(sp_name %in% c("Skistodiaptomus oregonensis", "Leptodiaptomus minutus", "Leptodiaptomus sicilis", "Epischura lacustris")) %>% 
  ggplot(aes(x = as.Date(collection_date), y = size, colour = sp_name)) + 
  facet_grid(sp_name~sex) + 
  geom_point() + 
  scale_colour_manual(values = species_cols) + 
  labs(x = "Day of the Year", 
       y = "Size (mm)") + 
  theme_matt_facets() +
  theme(axis.text.x = element_text(angle = 300, hjust = 0, vjust = 0.5),
        legend.position = "none")

ggarrange(ctmax_feature, size_feature, common.legend = T, labels = "AUTO", legend = "none")
```

```{r sp-occurences, fig.width=7, fig.height=5}
sample_dates_plot = full_data %>%  
  filter(sp_name != "Osphranticum labronectum") %>% 
  mutate(sp_name = as.factor(sp_name),
         sp_name = fct_reorder(sp_name, ctmax)) %>% 
  ggplot(aes(x = lubridate::as_date(collection_date), 
             y = sp_name, fill = sp_name)) + 
  # geom_vline(xintercept = as_date(
  #   c("2023-05-01",
  #     "2023-09-01",
  #     "2024-01-01",
  #     "2024-05-01")),
  #   colour = "grey",
  #   linewidth = 1) + 
  geom_density_ridges(bandwidth = 30,
                      jittered_points = TRUE, 
                      point_shape = 21,
                      point_size = 1,
                      point_colour = "grey30",
                      point_alpha = 0.8,
                      alpha = 0.8,
                      position = position_points_jitter(
                        height = 0.1, width = 0)) + 
  scale_fill_manual(values = species_cols) + 
  scale_x_date(date_breaks = "3 months",
               date_labels = "%b") + 
  coord_cartesian(xlim = lubridate::as_date(c("2023-06-08", "2024-05-08"))) + 
  labs(x = "Day of Year", 
       y = "Species") + 
  theme_matt() + 
  #theme_ridges(grid = T) + 
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 270, hjust = 0, vjust = 0.5))
```

The samples captured the broad seasonal changes in calanoid copepod community composition in the lake. We note, however, that rare species (e.g. *Senecella* and *Limnocalanus*) were often preferentially sampled, so are over-represented in the data set. 

```{r supp-fig-sp-props, fig.width = 12, fig.height = 5, include = F}
adult_summaries %>% 
  ungroup() %>% 
  mutate(collection_num = as.numeric(factor(collection_date))) %>% 
  group_by(collection_date) %>%  
  arrange(collection_date) %>% 
  select(sp_name, collection_date, collection_num, sample_size) %>% 
  mutate(sample_size = replace_na(sample_size, 0)) %>% 
  mutate(total = sum(sample_size),
         percentage = sample_size / total,
         collection_date = lubridate::as_date(collection_date)) %>% 
  ggplot(aes(x = collection_date, y = percentage, fill = sp_name)) + 
  geom_area() + 
  scale_fill_manual(values = species_cols) + 
  scale_y_continuous(breaks = c(0,1)) + 
  labs(x = "Collection Date", 
       y = "Proportion", 
       fill = "Species") + 
  theme_minimal(base_size = 20) + 
  theme(panel.grid = element_blank(),
        axis.ticks = element_line())
```

Throughout the season, the prevalence of various unidentified pathogens also varied, with very little infection observed during the Winter and Spring. 

```{r supp-fig-pathogen-props, fig.width=8, fig.height=5}
pathogen_cols = c("no" = "grey95", "cloudy" = "honeydew3", "spot" = "antiquewhite3", "other" = "tomato3")

full_data %>% 
  select(collection_date, dev_eggs, pathogen, lipids, sp_name, sex) %>% 
  group_by() %>% 
  filter(sex != "juvenile") %>% 
  group_by(collection_date) %>% 
  count(pathogen) %>% 
  filter(pathogen != "uncertain") %>% 
  pivot_wider(id_cols = "collection_date", 
              names_from = pathogen, 
              values_from = n,
              values_fill = 0) %>% 
  mutate(total = sum(no, cloudy, spot, other)) %>% 
  pivot_longer(cols = c(no, cloudy, spot, other),
               names_to = "pathogen", 
               values_to = "count") %>% 
  mutate(percent = count/total,
         collection_date = lubridate::as_date(collection_date),
         pathogen = fct_relevel(pathogen, "no", "cloudy", "spot", "other")) %>% 
  ggplot(aes(x = collection_date, y = percent, fill = pathogen)) + 
  geom_area() + 
  scale_fill_manual(values = pathogen_cols) + 
  scale_y_continuous(breaks = c(0,1)) + 
  labs(x = "Collection Date", 
       y = "Proportion", 
       fill = "Pathogen") + 
  theme_minimal(base_size = 20) + 
  theme(panel.grid = element_blank(),
        axis.ticks = element_line())
```

The transparent bodies of these copepods also allowed us to examine seasonal patterns in lipid reserves and in the production of eggs. Maturing oocytes are visible in female copepods before they are released. There was no strong seasonal cycle in the production of these eggs in any species, and instead, females were reproductively active throughout their respective seasons of occurence. 

```{r supp-fig-deveggs-props, fig.height = 15, fig.width = 8}
dev_eggs_cols = c("no" = "grey95", "yes" = "lightblue3")

full_data %>% 
  select(collection_date, dev_eggs, pathogen, lipids, sp_name, sex) %>% 
  group_by(sp_name) %>% 
  filter(sex != "juvenile") %>% 
  group_by(sp_name, collection_date) %>% 
  count(dev_eggs) %>% 
  filter(dev_eggs != "uncertain") %>% 
  pivot_wider(id_cols = c("collection_date", "sp_name"), 
              names_from = dev_eggs, 
              values_from = n,
              values_fill = 0) %>% 
  mutate(total = sum(no, yes)) %>% 
  pivot_longer(cols = c(no, yes),
               names_to = "dev_eggs", 
               values_to = "count") %>% 
  mutate(percent = count/total,
         collection_date = lubridate::as_date(collection_date),
         dev_eggs = fct_relevel(dev_eggs, "no", "yes")) %>% 
  ungroup() %>% 
  complete(collection_date, nesting(sp_name, dev_eggs), fill = list(percent = 1)) %>% 
  mutate(percent = if_else(is.na(total) & dev_eggs == "yes", 0, percent)) %>% 
  ggplot(aes(x = collection_date, y = percent, fill = dev_eggs)) + 
  facet_wrap(sp_name~., ncol = 1) + 
  geom_area() + 
  scale_fill_manual(values = dev_eggs_cols) + 
  scale_y_continuous(breaks = c(0,1)) + 
  labs(x = "Collection Date", 
       y = "Proportion", 
       fill = "Developing \nEggs") + 
  theme_minimal(base_size = 20) + 
  theme(panel.grid = element_blank(),
        axis.ticks = element_line())
```

The presence of lipids varied across species, with only *L. minutus*, *L. sicilis*, and *Limnocalanus* regularly possessing lipid stores. 

```{r supp-fig-lipids-props, fig.height = 15, fig.width = 8}
lipid_cols = c("no" = "grey95", "yes" = "sienna2")

full_data %>% 
  select(collection_date, dev_eggs, pathogen, lipids, sp_name, sex) %>% 
  group_by(sp_name) %>% 
  filter(sex != "juvenile") %>% 
  group_by(sp_name, collection_date) %>% 
  count(lipids) %>% 
  filter(lipids != "uncertain") %>% 
  pivot_wider(id_cols = c("collection_date", "sp_name"), 
              names_from = lipids, 
              values_from = n,
              values_fill = 0) %>% 
  mutate(total = sum(no, yes)) %>% 
  pivot_longer(cols = c(no, yes),
               names_to = "lipids", 
               values_to = "count") %>% 
  mutate(percent = count/total,
         collection_date = lubridate::as_date(collection_date),
         lipids = fct_relevel(lipids, "no", "yes")) %>% 
  ungroup() %>% 
  complete(collection_date, nesting(sp_name, lipids), fill = list(percent = 1)) %>% 
  mutate(percent = if_else(is.na(total) & lipids == "yes", 0, percent)) %>% 
  ggplot(aes(x = collection_date, y = percent, fill = lipids)) + 
  facet_wrap(sp_name~., ncol = 1) + 
  geom_area() + 
  scale_fill_manual(values = lipid_cols) + 
  scale_y_continuous(breaks = c(0,1)) + 
  labs(x = "Collection Date", 
       y = "Proportion", 
       fill = "Lipids\nPresent") + 
  theme_minimal(base_size = 20) + 
  theme(panel.grid = element_blank(),
        axis.ticks = element_line())
```


## Temperature Variability
Lake Champlain is highly seasonal, with both average temperatures and temperature variability changing throughout the year. These patterns in the experienced thermal environment may drive the observed variation in copepod thermal limits. However, the time period affecting copepod thermal limits is unknown. Depending the on the duration of time considered, there are large changes in the experienced environment, in particular regarding the temperature range and variance. Consider for example three time periods: the day of collection, one week prior to collection, and four weeks prior to collection. While the overall pattern is similar, we can see that, unsurprisingly, considering longer periods of time results in larger ranges and slightly changes the pattern of variance experienced. 

```{r daily-temp-data, include = F}
## Daily values
daily_temp_data = temp_data %>%
  ungroup() %>% 
  group_by(date) %>% 
  summarise(mean_temp = mean(temp),
            med_temp = median(temp),
            var_temp = var(temp), 
            min_temp = min(temp), 
            max_temp = max(temp)) %>% 
  mutate("range_temp" = max_temp - min_temp)

day_prior_temp_data = temp_data %>% 
  ungroup() %>% 
  group_by(date) %>% 
  summarise(mean_temp = mean(temp),
            med_temp = median(temp),
            var_temp = var(temp), 
            min_temp = min(temp), 
            max_temp = max(temp)) %>% 
  mutate(date = date + 1) %>% 
  rename_with(.fn = ~ paste0("prior_day_", .x), .cols = c(-date))

daily_plot = daily_temp_data %>% 
  pivot_longer(cols = c(-date),
               names_to = "parameter", 
               values_to = "temp") %>% 
  ggplot(aes(x = date, y = temp, colour = parameter)) + 
  geom_line(linewidth = 1) + 
  scale_colour_manual(values = c(
    "mean_temp" = "olivedrab3",
    "med_temp" = "seagreen3",
    "max_temp" = "tomato",  
    "min_temp" = "dodgerblue",
    "range_temp" = "goldenrod3",
    "var_temp" = "darkgoldenrod1"
  )) + 
  scale_x_continuous(breaks = as.Date(c("2023-01-01", "2023-04-01", "2023-07-01"))) + 
  ggtitle("Daily Values") + 
  labs(y = "Temperature (°C)",
       x = "") + 
  theme_bw(base_size = 20) + 
  theme(panel.grid = element_blank(),
        axis.text.x = element_text(angle = 270, hjust = 0, vjust = 0.5))
```

```{r predictors-function}
## Defining the function to get predictor values for periods of different lengths
get_predictors = function(daily_values, raw_temp, n_days){
  prefix = str_replace_all(xfun::numbers_to_words(n_days), pattern = " ", replacement = "-")
  
  mean_values = daily_values %>% 
    ungroup() %>% 
    mutate(mean_max = slide_vec(.x = max_temp, .f = mean, .before = n_days, .complete = T),
           mean_min = slide_vec(.x = min_temp, .f = mean, .before = n_days, .complete = T),
           mean_range = slide_vec(.x = range_temp, .f = mean, .before = n_days, .complete = T)) %>% 
    select(date, mean_max, mean_min, mean_range) %>% 
    rename_with( ~ paste(prefix, "day", .x, sep = "_"), .cols = c(-date))
  
  period_values = raw_temp %>% 
    mutate(mean = slide_index_mean(temp, i = date, before = days(n_days), 
                                   na_rm = T),
           max = slide_index_max(temp, i = date, before = days(n_days), 
                                 na_rm = T),
           min = slide_index_min(temp, i = date, before = days(n_days),
                                 na_rm = T),
           med = slide_index_dbl(temp, .i = date, .before = days(n_days), 
                                 na_rm = T, .f = median),
           var = slide_index_dbl(temp, .i = date, .before = days(n_days), 
                                 .f = var),
           range = max - min) %>%  
    select(-temp) %>%  
    distinct() %>% 
    rename_with( ~ paste(prefix, "day", .x, sep = "_"), .cols = c(-date))%>% 
    inner_join(mean_values, by = c("date")) %>%  
    drop_na()
  
  return(period_values)
}
```

```{r misc-predictors-and-plots, fig.width=12, fig.height=5, include = F}
# ## Getting predictor variables for different periods
# 
# ### Short (three days)
# three_day_temps = get_predictors(daily_values = daily_temp_data, 
#                                  raw_temp = temp_data, 
#                                  n_days = 3)
# 
# ### ONE WEEK
week_temps = get_predictors(daily_values = daily_temp_data,
                            raw_temp = temp_data,
                            n_days = 7)

week_plot = week_temps %>%
  pivot_longer(cols = c(-date),
               names_to = "parameter",
               values_to = "temp") %>%
  filter(parameter %in% c("seven_day_mean",
                          "seven_day_med",
                          "seven_day_max",
                          "seven_day_min",
                          "seven_day_var",
                          "seven_day_range")) %>%
  mutate(parameter = paste(word(parameter, start = 3, sep = fixed("_")), "_temp", sep = "")) %>%
  ggplot(aes(x = date, y = temp, colour = parameter)) +
  geom_line(linewidth = 1) +
  scale_colour_manual(values = c(
    "mean_temp" = "olivedrab3",
    "med_temp" = "seagreen3",
    "max_temp" = "tomato",
    "min_temp" = "dodgerblue",
    "range_temp" = "goldenrod3",
    "var_temp" = "darkgoldenrod1"
  )) +
  scale_x_continuous(breaks = as.Date(c("2023-01-01", "2023-04-01", "2023-07-01"))) +
  ggtitle("One Week") +
  labs(y = "Temperature (°C)",
       x = "") +
  theme_bw(base_size = 20) +
  theme(panel.grid = element_blank(),
        axis.text.x = element_text(angle = 270, hjust = 0, vjust = 0.5))
# 
# 
# ### TWO WEEKS
# two_week_temps = get_predictors(daily_values = daily_temp_data, 
#                                 raw_temp = temp_data, 
#                                 n_days = 14)
# 
# two_week_plot = two_week_temps %>% 
#   pivot_longer(cols = c(-date),
#                names_to = "parameter", 
#                values_to = "temp") %>% 
#   filter(parameter %in% c("fourteen_day_mean",
#                           "fourteen_day_med",
#                           "fourteen_day_max", 
#                           "fourteen_day_min", 
#                           "fourteen_day_var",
#                           "fourteen_day_range")) %>% 
#   mutate(parameter = paste(word(parameter, start = 3, sep = fixed("_")), "_temp", sep = "")) %>% 
#   ggplot(aes(x = date, y = temp, colour = parameter)) + 
#   geom_line(linewidth = 1) + 
#   scale_colour_manual(values = c(
#     "mean_temp" = "olivedrab3",
#     "med_temp" = "seagreen3",
#     "max_temp" = "tomato",  
#     "min_temp" = "dodgerblue",
#     "range_temp" = "goldenrod3",
#     "var_temp" = "darkgoldenrod1"
#   )) + 
#   scale_x_continuous(breaks = as.Date(c("2023-01-01", "2023-04-01", "2023-07-01"))) + 
#   ggtitle("Two Weeks") + 
#   labs(y = "Temperature (°C)",
#        x = "") + 
#   theme_bw(base_size = 20) + 
#   theme(panel.grid = element_blank(),
#         axis.text.x = element_text(angle = 270, hjust = 0, vjust = 0.5))
# 
# 
# ### FOUR WEEKS
four_week_temps = get_predictors(daily_values = daily_temp_data,
                                 raw_temp = temp_data,
                                 n_days = 28)

four_week_plot = four_week_temps %>%
  pivot_longer(cols = c(-date),
               names_to = "parameter",
               values_to = "temp") %>%
  filter(parameter %in% c("twenty-eight_day_mean",
                          "twenty-eight_day_med",
                          "twenty-eight_day_max",
                          "twenty-eight_day_min",
                          "twenty-eight_day_var",
                          "twenty-eight_day_range")) %>%
  mutate(parameter = paste(word(parameter, start = 3, sep = fixed("_")), "_temp", sep = "")) %>%
  ggplot(aes(x = date, y = temp, colour = parameter)) +
  geom_line(linewidth = 1) +
  scale_colour_manual(values = c(
    "mean_temp" = "olivedrab3",
    "med_temp" = "seagreen3",
    "max_temp" = "tomato",
    "min_temp" = "dodgerblue",
    "range_temp" = "goldenrod3",
    "var_temp" = "darkgoldenrod1"
  )) +
  scale_x_continuous(breaks = as.Date(c("2023-01-01", "2023-04-01", "2023-07-01"))) +
  ggtitle("Four Weeks") +
  labs(y = "Temperature (°C)",
       x = "") +
  theme_bw(base_size = 20) +
  theme(panel.grid = element_blank(),
        axis.text.x = element_text(angle = 270, hjust = 0, vjust = 0.5))
# 
# 
# ### EIGHT WEEKS
# eight_week_temps = get_predictors(daily_values = daily_temp_data, 
#                                   raw_temp = temp_data, 
#                                   n_days = 56)
# 
# eight_week_plot = eight_week_temps %>% 
#   pivot_longer(cols = c(-date),
#                names_to = "parameter", 
#                values_to = "temp") %>% 
#   filter(parameter %in% c("fifty-six_day_mean",
#                           "fifty-six_day_med",
#                           "fifty-six_day_max", 
#                           "fifty-six_day_min", 
#                           "fifty-six_day_var",
#                           "fifty-six_day_range")) %>% 
#   mutate(parameter = paste(word(parameter, start = 3, sep = fixed("_")), "_temp", sep = "")) %>% 
#   ggplot(aes(x = date, y = temp, colour = parameter)) + 
#   geom_line(linewidth = 1) + 
#   scale_colour_manual(values = c(
#     "mean_temp" = "olivedrab3",
#     "med_temp" = "seagreen3",
#     "max_temp" = "tomato",  
#     "min_temp" = "dodgerblue",
#     "range_temp" = "goldenrod3",
#     "var_temp" = "darkgoldenrod1"
#   )) + 
#   scale_x_continuous(breaks = as.Date(c("2023-01-01", "2023-04-01", "2023-07-01"))) + 
#   ggtitle("Eight Weeks") + 
#   labs(y = "Temperature (°C)",
#        x = "") + 
#   theme_bw(base_size = 20) + 
#   theme(panel.grid = element_blank(),
#         axis.text.x = element_text(angle = 270, hjust = 0, vjust = 0.5))
# 
ggarrange(daily_plot, week_plot, four_week_plot, 
          common.legend = T, nrow = 1, labels = "AUTO", legend = "bottom")
```

```{r misc-corr-periods, include = F}
#The different time periods examined by this climate data highlights that the relationship between minimum and maximum temperatures changes based on the window examined. For example, minimum and maximum temperatures experienced over weekly intervals are closely linked, whereas there is a distinct seasonal cycle in the relationship between minimum and maximum temperatures experienced over periods of four weeks. 

one_week_doy_data = week_temps %>% 
  mutate(doy = yday(date))

one_week_temp_circle = ggplot(one_week_doy_data, aes(x = seven_day_mean_max, y = seven_day_mean_min, colour = doy)) + 
  geom_point() + 
  scale_colour_gradient2(
    high = "dodgerblue4",
    mid = "coral2",
    low = "dodgerblue4",
    midpoint = 182.5) + 
  labs(x = "Max. Temp. (°C)",
       y = "Min. Temp. (°C)") + 
  labs(x = "Max. Temp. (°C)",
       y = "Min. Temp. (°C)") + 
  ggtitle("One Week") + 
  theme_matt()

four_week_doy_data = four_week_temps %>% 
  mutate(doy = yday(date))

four_week_temp_circle = ggplot(four_week_doy_data, aes(x = `twenty-eight_day_max`, y = `twenty-eight_day_min`, colour = doy)) + 
  geom_point() + 
  scale_colour_gradient2(
    high = "dodgerblue4",
    mid = "coral2",
    low = "dodgerblue4",
    midpoint = 182.5) + 
  labs(x = "Max. Temp. (°C)",
       y = "Min. Temp. (°C)") + 
  ggtitle("Four Week") + 
  theme_matt()

ggarrange(one_week_temp_circle, four_week_temp_circle,
          common.legend = T, labels = "AUTO", legend = "bottom")
```

Organisms are unlikely to acclimate instantaneously to changes in temperature. To explore the potential temporal window these copepods are responding to, we examined the correlation between thermal limits and summaries of the thermal environment over different periods of time. For each species (inclusive of all sexes and stages), we examined the correlation between CTmax and one of nine representations of the thermal environment calculated for periods of time from 1 to 60 days before collection. These parameters include the overall maximum, minimum, median, and mean temperature for the period of time, the temperature range and variance during this time, and the mean daily temperature maximum, minimum, and range. We also examined the correlation between CTmax and the temperature recorded at the time of collection. 

```{r estimating-predictor-correlations, include = F}
#We can see that, in general, copepods are responding to proximate cues from the thermal environment, with correlations generally dropping off substantially as acclimation window duration increases. An exception is *Epischura lacustris*, which appears to be responding to maximum temperatures experienced over a 20 day time period. 

### Pulling predictors and measuring correlations for much finer timescales; 1-56 days

num_colls = full_data %>% 
  filter(sex == "female") %>% 
  select(collection_date, sp_name) %>%  
  distinct() %>%  
  count(sp_name) %>% 
  filter(n >= 5)

corr_vals = data.frame()

dur_vals = c(1:50)
for(i in dur_vals){
  
  duration_temps = get_predictors(daily_values = daily_temp_data, 
                                  raw_temp = select(temp_data, -hour), 
                                  n_days = i) %>% 
    filter(date %in% as_date(unique(full_data$collection_date)))
  
  corr_data = full_data %>%
    select(sp_name, collection_date, collection_temp, sex, ctmax) %>% 
    filter(sp_name %in% num_colls$sp_name) %>% 
    #filter(sex == "female") %>% 
    mutate(collection_date = as.Date(collection_date)) %>% 
    inner_join(duration_temps, join_by(collection_date == date)) %>% 
    pivot_longer(cols = c(collection_temp, contains("day_")),
                 values_to = "value", 
                 names_to = "predictor") %>%  
    group_by(sp_name, predictor) %>% 
    summarise(correlation = cor.test(ctmax, value)$estimate,
              p.value = cor.test(ctmax, value)$p.value,
              ci_low = cor.test(ctmax, value)$conf.int[1],
              ci_high = cor.test(ctmax, value)$conf.int[2],
              .groups = "keep") %>% 
    filter(predictor != "collection_temp") %>% 
    mutate(sig = ifelse(p.value <0.05, "Sig.", "Non Sig.")) %>% 
    separate(predictor, "_day_", into = c(NA, "parameter")) %>% 
    mutate(duration = i)
  
  corr_vals = bind_rows(corr_vals, corr_data)
}

coll_corr = full_data %>%
  filter(sp_name %in% num_colls$sp_name) %>% 
  filter(sex == "female") %>% 
  group_by(sp_name) %>% 
  summarise(correlation = cor.test(ctmax, collection_temp)$estimate,
            p.value = cor.test(ctmax, collection_temp)$p.value,
            ci_low = cor.test(ctmax, collection_temp)$conf.int[1],
            ci_high = cor.test(ctmax, collection_temp)$conf.int[2]) %>% 
  mutate(sig = ifelse(p.value <0.05, "Sig.", "Non Sig.")) %>% 
  mutate(duration = 0,
         parameter = "coll_temp")

corr_vals = corr_vals %>%  
  mutate(duration = as.numeric(duration)) %>% 
  bind_rows(coll_corr)

```

Shown below are the correlation coefficients for these relationships. Each facet shows the relationship for a different parameter, plotted against the duration of the time period before collection. 

```{r supp-fig-correlation-durations, fig.width=14, fig.height=8, include = T}
corr_vals %>% 
  mutate(parameter = fct_relevel(parameter, c("min", "max", "range",
                                              "mean", "med", "var",
                                              "mean_min", "mean_max", "mean_range"))) %>% 
  ggplot(aes(x = duration, y = correlation, colour = sp_name)) + 
  facet_wrap(.~parameter) + 
  geom_hline(yintercept = 0) + 
  geom_point(size = 0.9) + 
  geom_line(linewidth = 1.5) + 
  scale_colour_manual(values = species_cols) + 
  labs(x = "Duration (days)",
       y = "Correlation", 
       colour = "Species") + 
  theme_matt_facets()
```

This table contains the top three factors for each species (based on correlation coefficient).

```{r predictor-correlations, include = F}
corr_vals %>%  
  filter(sig == "Sig.") %>% 
  drop_na(correlation) %>% 
  group_by(sp_name) %>%
  arrange(desc(correlation)) %>% 
  slice_head(n = 3) %>% 
  select("Species" = sp_name, "Predictor" = parameter, "Duration" = duration, "Correlation" = correlation, "P-Value" = p.value) %>% 
  knitr::kable(align = "c")
```

Shown here is a graphical summary of the duration of the best predictors for each species. Note that for the two Leptodiaptomids, collection temperature had the largest correlation coefficient so duration is zero. This representation highlights that there is variation across the community not only in the potential driver (e.g. minimum vs. maximum temperatures) but also in the duration of time. This variation is not grouped by season (the winter and summer communities both have representative species apparently responding to short and long durations). 

```{r main-fig-acc-durations}
duration_plot = corr_vals %>%  
  filter(sig == "Sig.") %>% 
  drop_na(correlation) %>% 
  group_by(sp_name) %>%
  arrange(desc(correlation)) %>% 
  slice_head(n = 1) %>% 
  ungroup() %>% 
  mutate("num" = row_number(), 
         sp_name = fct_reorder(sp_name, duration, .fun = mean, .desc = T)) %>% 
  arrange(sp_name) %>% 
  select("Species" = sp_name, "Predictor" = parameter, "Duration" = duration, "Correlation" = correlation, num) %>% 
  ggplot(aes(x = Species, y = Duration, fill = Predictor, group = num)) + 
  geom_bar(stat = "identity", width = 0.5, position = position_dodge(width = 0.6),
           colour = "black") + 
  scale_fill_manual(values = c("coll_temp" = "black", "max" = "white", "min" = "grey")) + 
  labs(x = "", 
       y = "Duration \n(days)") + 
  theme_matt() + 
  theme(axis.text.x = element_blank())
```

```{r main-fig-acc-correlations, fig.width=6, fig.height=8}
correlation_coef_plot = corr_vals %>%  
  filter(sig == "Sig." | parameter == "coll_temp") %>% 
  drop_na(correlation) %>% 
  group_by(sp_name) %>%
  filter(parameter == "coll_temp" | correlation == max(correlation)) %>% 
  arrange(sp_name, parameter) %>% 
  mutate("num" = row_number()) %>% 
  select("Species" = sp_name, "Predictor" = parameter, "Duration" = duration, "Correlation" = correlation, num) %>% 
  mutate(Predictor = if_else(Predictor == "coll_temp", Predictor, "best")) %>% 
  ungroup() %>% 
  mutate(Species = fct_reorder(Species, Duration, .fun = max, .desc = T)) %>% 
  ggplot(aes(x = Species, y = Correlation, fill = Predictor, group = num)) + 
  geom_bar(stat = "identity", width = 0.5, position = position_dodge(width = 0.6),
           colour = "black") + 
  labs(y = "Correlation \nCoefficient",
       fill = "Correlate") +
  scale_fill_manual(values = c("coll_temp" = "black", "best" = "white")) + 
  scale_y_continuous(breaks = c(0, 1), limits = c(0,1)) +
  theme_matt() + 
  theme(axis.text.x = element_text(angle = 300, hjust = 0, vjust = 0.5))

ggarrange(duration_plot, correlation_coef_plot, nrow = 2, legend = "right", labels = "AUTO",
          heights = c(0.4, 0.6))
```

```{r misc-acc-duration-plot, fig.width=7, fig.height=4, include = F}
# Phenotypic variation (like acclimation of thermal limits) is a physiological process. depending on the mechanistic underpinnings (changes in HSP expression, etc.), the amount of time it takes for an individual to acclimate may vary based on body size (larger species, more cells, more time required to acclimate). Shown here is the duration of the environmental acclimation window the copepods appear to be responding to.  

mean_sizes = full_data %>% 
  filter(sex == "female") %>% 
  group_by(sp_name) %>%  
  summarise(mean_size = mean(size, na.rm = T))

corr_vals %>% 
  group_by(sp_name) %>% 
  filter(correlation == max(correlation)) %>%  
  inner_join(mean_sizes, by = "sp_name") %>% 
  select(sp_name, duration, mean_size) %>%  
  ggplot(aes(x = mean_size, y = duration)) + 
  geom_point(aes(colour = sp_name), 
             size = 4) + 
  scale_colour_manual(values = species_cols) + 
  labs(x = "Mean Female Size (mm)",
       y = "Acclimation Duration",
       colour = "Species") + 
  theme_matt() + 
  theme(legend.position = "right")
```

## Trait Variation 

Shown below are the clutch size distributions for the three diaptomiid species, which produce egg sacs that allow for easy quantification of fecundity. 

```{r supp-fig-fecundity-histogram, fig.width=7, fig.height=10}
full_data %>%  
  drop_na(fecundity) %>%  
  ggplot(aes(x = fecundity, fill = sp_name_sub)) + 
  facet_wrap(.~sp_name_sub, ncol = 1) + 
  geom_histogram(binwidth = 2) + 
  scale_fill_manual(values = species_cols) + 
  labs(x = "Fecundity (# Eggs)") +
  theme_matt_facets() + 
  theme(legend.position = "none")
```

One of the main aims of this project is to examine the patterns and processes driving variation in upper thermal limits across these species of copepods. 

### Variation with temperature 

We expect one of the primary drivers of copepod thermal limits to be temperature, as individuals acclimate to seasonal changes. Shown below are the seasonal patterns of when copepods were included in CTmax measurements (a proxy for the season of occurrence), and thermal limits for each species plotted against the temperature at the time of collection. We generally see an increase in thermal limits with increasing collection temperature.

```{r fig.width=7, fig.height=5}
sp_ctmax_temp = full_data %>% 
  filter(sp_name != "Osphranticum labronectum") %>% 
  mutate(sp_name = as.factor(sp_name),
         sp_name = fct_reorder(sp_name, ctmax, .desc = T)) %>% 
  ggplot(aes(x = collection_temp, y = ctmax, colour = sp_name)) + 
  facet_wrap(sp_name~.) + 
  geom_smooth(method = "lm", se = F, linewidth = 1.5, colour = "grey30") + 
  geom_point(size = 2, alpha = 0.4) + 
  labs(x = "Collection Temp. (°C)", 
       y = "CTmax (°C)") + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "none")
```

The interaction between seasonal changes in temperature and the acclimation of thermal limits likely affects vulnerability of each species to warming. Shown below are warming tolerance values for each species, calculated as the difference between individual CTmax and the temperature at the time of collection. All species maintained some degree of buffer between environmental temperatures and upper thermal limits, but *L. minutus* appears to approach its upper thermal limit during the warmest collections during the summer. 

Also shown below is the relationship between fecundity (the number of eggs contained in a clutch) for the three diaptomid species. For the two Leptodiaptomus species, there is no relationship between clutch size and temperature, while there appears to be a general increase in clutch size with temperature in the Skistodiaptomus species. 

```{r main-fig-trait-coll-temp-plots, fig.width=15, fig.height=7}

wt_temp = full_data %>% 
  filter(sp_name != "Osphranticum labronectum") %>% 
  ggplot(aes(x = collection_temp, y = warming_tol, colour = sp_name)) + 
  geom_point(size = 3,
             alpha = 0.3) + 
  geom_smooth(method = "lm", linewidth = 3) +
  labs(x = "Collection Temperature (°C)", 
       y = "Warming Tolerance (°C)",
       colour = "Species")  + 
  ylim(0,30) + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "none")

eggs_temp = full_data %>% 
  filter(sp_name != "Osphranticum labronectum") %>% 
  ggplot(aes(x = collection_temp, y = fecundity, colour = sp_name)) + 
  geom_point(size = 3,
             alpha = 0.3) + 
  geom_smooth(method = "lm", linewidth = 3) +
  labs(x = "Collection Temperature (°C)", 
       y = "Fecundity (# Eggs)",
       colour = "Species")  + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "right")

ggarrange(wt_temp, eggs_temp, 
          common.legend = T, legend = "right", labels = "AUTO")
```

```{r supp-fig-lsic-morphs, include = F}
morph_data = full_data %>% 
  filter(sex == "female" & species == "leptodiaptomus_sicilis") %>%  mutate(sp_name = case_when(
    sp_name == "Leptodiaptomus sicilis" & size >= 0.89 ~ "Large",
    sp_name == "Leptodiaptomus sicilis" & size < 0.89 ~ "Small",
    .default = sp_name
  ))

ggplot(morph_data, aes(x = collection_temp, y = ctmax, colour = sp_name)) + 
  geom_point(size = 2, alpha = 0.8) + 
  geom_smooth(method = "lm", se = T, linewidth = 2) + 
  labs(x = "Collection Temp. (°C)", 
       y = "CTmax (°C)") + 
  theme_matt() + 
  theme(legend.position = "none")
```

```{r misc-ctmax-range-plot, include = F}
full_data %>%  
  filter(sex == "female") %>% 
  group_by(sp_name, collection_date, collection_temp) %>%  
  summarise("ctmax_range" = max(ctmax) - min(ctmax),
            "ctmax_var" = var(ctmax),
            "sample_size" = n()) %>% 
  ungroup() %>% 
  group_by(sp_name) %>% 
  filter(sp_name != "Leptodora kindti") %>% 
  filter(sample_size > 3) %>% 
  ggplot(aes(x = collection_temp, y = ctmax_var, colour = sp_name)) + 
  facet_wrap(sp_name~.) + 
  geom_point() + 
  geom_smooth(method = "lm", colour = "black") + 
  scale_colour_manual(values = species_cols) + 
  theme_matt_facets()
```

```{r ctmax-coll-temp-model, include = F}
# adult_data = full_data %>% 
#   filter(sex == "female")

# 
# ctmax_temp.model = lm(data = model_data, ctmax ~ temp_cent * sp_name)
# size_temp.model = lm(data = model_data, size ~ temp_cent * sp_name)
# fecund_temp.model = lm(data = drop_na(model_data, fecundity), fecundity ~ temp_cent * sp_name)
# 
# fecundity_resids = cbind(drop_na(model_data, fecundity), "fecundity_resids" = fecund_temp.model$residuals) %>% 
#   select(collection_date, exp_date, replicate, species, tube, fecundity_resids)
# 
# ctmax_resids = cbind(model_data, "resids" = ctmax_temp.model$residuals, "size_resids" = size_temp.model$residuals) %>% 
#   left_join(fecundity_resids)

```

```{r supp-fig-ctmax-time-in-lab, fig.width=15, fig.height=10}
# Copepods spent several days in lab during experiments. Shown below are the CTmax residuals (taken from a model of CTmax against collection temperature) plotted against the time spent in lab before measurements were made. Individual regressions are shown for the residuals against days in lab for each collection. We can see clearly that thermal limits are fairly stable over time. 


# ggplot(ctmax_resids, aes(x = days_in_lab, y = resids, colour = sp_name, group = collection_date)) + 
#   facet_wrap(sp_name~.) + 
#   geom_point(size = 4, alpha = 0.5) + 
#   geom_smooth(method = "lm", se = F, linewidth = 1) + 
#   #scale_x_continuous(breaks = c(0:5)) + 
#   labs(x = "Days in lab", 
#        y = "CTmax Residuals") + 
#   scale_colour_manual(values = species_cols) + 
#   theme_matt_facets() + 
#   theme(legend.position = "none")
```

```{r supp-fig-model-performance, fig.width=9, fig.height=13}

model_data = full_data %>%  
  drop_na(size, ctmax) %>%  
  filter(sp_name != "Osphranticum labronectum") %>% 
  mutate(temp_cent = scale(collection_temp, center = T, scale = F),
         size_cent = scale(size, center = T, scale = F))

minimal.model = lme4::lmer(data = model_data,
                           ctmax ~ sp_name + sex + temp_cent +
                             (1|days_in_lab))

full.model = lme4::lmer(data = model_data,
                        ctmax ~ sp_name*sex*temp_cent +
                          (1|days_in_lab))

drop1(full.model, test = "Chisq")

performance::test_performance(minimal.model, full.model)
performance::check_model(full.model)

car::Anova(full.model, type = "III")

sp_ctmax = emmeans::emmeans(full.model, specs = "sp_name") %>% 
  data.frame() %>% 
  select(sp_name, "species_ctmax" = emmean)

model_coefs = emmeans::emtrends(full.model, var = "temp_cent", specs = "sp_name") %>% 
  data.frame() %>% 
  inner_join(sp_ctmax) 

ctmax_resids = model_data %>% 
  mutate(resids = residuals(full.model))

#write.csv(model_coefs, "Output/Data/ARR_data.csv")
```

```{r, include = F}

morph_comp = full_data %>% 
  filter(species == "leptodiaptomus_sicilis") %>% 
   mutate(morph = case_when(
    size >= 0.89 ~ "Large",
    size < 0.89 ~ "Small")) %>% 
  select(collection_date, collection_temp, ctmax, size, morph)

morph.model = lm(data = morph_comp, 
                 ctmax ~ morph * collection_temp)

#performance::check_model(morph.model)

car::Anova(morph.model)

emmeans::emmeans(morph.model, specs = "morph", at = list(collection_temp = 5)) %>% pairs()

# Individual size is strongly impacted by developmental temperature. The presence of two morphs may reflect two different generations with differing developmental conditions. The large morph is present during the Fall while the smaller morph is present during the Winter and Spring. Individuals that are mature during the Fall were presumably born during the previous Winter/Spring, largely developing during the coldest time of year. These individuals reappear in the community in the late Fall, when water temperatures are still relatively warm - offspring produced by the large morphs during the Fall mature under these warmer conditions, producing the smaller morph. The effects of these developmental differences can also be seen in the thermal limit data - the smaller morph (which developed at higher temperatures) tended to have higher upper thermal limits than the larger morph (which developed at cooler temperatures). This suggests a layered impact of developmental phenotypic plasticity and acclimation, which together produce a 'bump' in L. sicilis CTmax during the coldest period of time. During the Fall and early winter, CTmax measurements were made on individuals from the large morph, with acclimation to cooling driving a decrease in CTmax. Eventually, individuals from the small morph mature and replace the previous generation. This results in an apparent increase in CTmax just before the coldest time of year, driven by the effects of developmental plasticity. Surface waters continue to cool, with acclimation then driving a small decrease in CTmax. 

```


```{r main-fig-ARR-synth-plot, fig.width=9, fig.height=6}
arr_combined = synth_arr %>%
  filter(measure == "upper" & mean_lim > 20) %>% 
  select("group" = genus, arr, mean_lim) %>% 
  mutate("dataset" = "synthesis") %>% 
  bind_rows(
    select(model_coefs, "group" = sp_name, 'arr' = temp_cent.trend, 'mean_lim' = species_ctmax)
  ) %>% 
  mutate(dataset = if_else(is.na(dataset), "new data", "synthesis"),
         group = fct_reorder(group, arr, .desc = T))


ggplot(arr_combined, aes(x = mean_lim, y = arr)) + 
  geom_smooth(method = "lm", se = F, 
              linewidth = 2, colour = "grey30") + 
  geom_point(data = filter(arr_combined, dataset != "new data"), 
             size = 3, colour = "black", shape = 1, stroke = 1.4) + 
  geom_point(data = filter(arr_combined, dataset == "new data"),
             aes(colour = group), 
             size = 4.5) + 
  scale_colour_manual(values = species_cols) + 
  labs(x = "Thermal Limit", 
       y = "ARR", 
       colour = "Species") +
  theme_matt() + 
  theme(legend.position = "right")

```

### Sex and stage variation in thermal limits 
Previous sections have generally lumped juvenile, female, and male individuals together. There may be important stage- or sex-specific differences in CTmax though. For all species but Osphranticum, we have measurements for individuals in different stages and of different sexes. 

```{r sex-stage-table}
sex_sample_sizes = full_data %>%  
  group_by(sp_name, sex) %>%  
  summarise(num = n()) %>%  
  pivot_wider(id_cols = sp_name,
              names_from = sex, 
              values_from = num,
              values_fill = 0) %>% 
  select("Species" = sp_name, "Juvenile" = juvenile, "Female" = female, "Male" = male)

knitr::kable(sex_sample_sizes, align = "c")
```

Across group comparisons show that there are generally no differences in thermal limits (represented here as the residuals from a CTmax ~ collection_temp x species linear regression), with the exception of Senecella males, which may have lower thermal limits (although sample sizes are very small in this group).

```{r supp-fig-ctmax-sex, fig.width=15, fig.height=8}
# ctmax_resids %>% 
#   filter(sp_name != "Osphranticum labronectum") %>% 
#   ggplot(aes(x = sex, y = resids, colour = sp_name)) + 
#   facet_wrap(sp_name~.) + 
#   geom_jitter(width = 0.1, alpha = 0.5) + 
#   geom_boxplot(width = 0.4, fill = NA, colour = "black", 
#                linewidth = 1, outlier.colour = NA) + 
#   scale_colour_manual(values = species_cols) + 
#   theme_matt_facets()
```

```{r trait-variance-coll-temp, include = F}
# 
# Given the long generation times of these copepods, decreases in trait variance may indicate selection over the seasonal cycle. Shown below are the variance in observed CTmax and size, plotted against collection date. Variance decreases in *Skistodiaptomus*, but this pattern is driven by a single collection with high variance early in the year. Size variance increases slightly in *Skistodiaptomus*. Variance in both CTmax and size is fairly constant in *Leptodiaptomus minutus*, the only other species collected across the entire set of samples thus far. 
# 
# ggplot(drop_na(adult_summaries, ctmax_var), aes(x = as.Date(collection_date), y = ctmax_var, colour = sp_name)) + 
#   facet_wrap(sp_name~., scales = "free_y") + 
#   geom_point(size = 2) + 
#   geom_smooth(se = F) + 
#   labs(x = "Collection Temp. (°C)", 
#        y = "CTmax Variance") + 
#   scale_colour_manual(values = species_cols) + 
#   theme_matt_facets() + 
#   theme(legend.position = "none")
# 
# ggplot(drop_na(adult_summaries, size_var), aes(x = as.Date(collection_date), y = size_var, colour = sp_name)) + 
#   facet_wrap(sp_name~.) + 
#   geom_point(size = 2) + 
#   geom_smooth(se = F) + 
#   labs(x = "Collection Temp. (°C)", 
#        y = "Size Variance") + 
#   scale_colour_manual(values = species_cols) + 
#   theme_matt_facets() + 
#   theme(legend.position = "none")
```

```{r supp-fig-model2-performance, fig.width=9, fig.height=13}
model2_data = model_data %>% 
  filter(sex == "female", 
         pathogen != "uncertain", 
         dev_eggs != "uncertain", 
         lipids != "uncertain") %>% 
  mutate(pathogens = fct_relevel(pathogen, "no", "spot", "cloudy", "other"))

other_factor_model = lmer(data = model2_data, 
                          ctmax~sp_name * collection_temp + dev_eggs + pathogen + lipids + (1|days_in_lab))

drop1(other_factor_model, scope = ~., test = "Chisq")

reduced_factors_model = lmer(data = model2_data, 
                          ctmax~sp_name * collection_temp + pathogen + (1|days_in_lab))

performance::check_model(reduced_factors_model)

car::Anova(reduced_factors_model, type = "III")
```

```{r supp-fig-pathogen-effect, fig.width=8, fig.height=5}
emmeans::emmeans(reduced_factors_model, spec = "pathogen") %>% emmeans::contrast(method="trt.vs.ctrl",ref="no") %>% plot() + 
  geom_vline(xintercept = 0) + 
  labs(x = "Difference (°C)", 
       y = "Comparison") + 
    theme_matt()
```



### Trait Correlations and Trade-offs

A relationship between size and upper thermal limits has been suggested in a wide range of other taxa. Shown below are the measured upper thermal limits plotted against prosome length. The overall relationship (inclusive of all species) is shown as the black line in the background. Regressions for each individual species are also shown. Across the entire assemblage, there is a strong decrease in thermal limits with increasing size.  

```{r misc-ctmax-size, fig.width=10, fig.height=7}
full_data %>% 
  #filter(sex == "female") %>%  
  ggplot( aes(x = size, y = ctmax, colour = sp_name)) + 
  geom_point(size = 2, alpha = 0.3) + 
  geom_smooth(data = full_data, 
              aes(x = size, y = ctmax),
              method = "lm", 
              colour ="black", 
              linewidth = 2.5) + 
  labs(x = "Length (mm)", 
       y = "CTmax (°C)",
       colour = "Species") + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "right")

```

Shown here is the relationship for each species individually. 

```{r misc-ind-sp-ctmax-size, fig.width=9, fig.height=6}
full_data %>% 
  #filter(sex == "female") %>%  
  group_by(sp_name) %>% filter(n() >2) %>% filter(!str_detect(sp_name, pattern = "kindti")) %>% 
  ggplot( aes(x = size, y = ctmax, colour = sp_name)) + 
  facet_wrap(sp_name~., scales = "free", nrow = 2) + 
  geom_point(size = 2, alpha = 0.8) + 
  geom_smooth(method = "lm", se = F, linewidth = 2) + 
  labs(x = "Length (mm)", 
       y = "CTmax (°C)",
       colour = "Species") + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "none")
```

Shown below is the relationship between mean size and mean thermal limits for females of each species. We see that larger species within the community tend to have a lower thermal limit than smaller species. 

```{r main-fig-mean-ctmax-mean-size-plot, fig.width=9, fig.height=5}
full_data %>% 
  group_by(sp_name, sex) %>% 
  summarize(mean_ctmax = mean(ctmax, na.rm = T),
            mean_size = mean(size, na.rm = T)) %>% 
  #filter(sex == "female") %>% 
  ggplot(aes(x = mean_size, y = mean_ctmax)) + 
  geom_smooth(method = "lm", se = F, linewidth = 2, colour = "black") + 
  geom_point(aes(colour = sp_name, shape = sex),
             size = 5) + 
  labs(x = "Length (mm)", 
       y = "CTmax (°C)",
       colour = "Species") + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "right")
```

```{r ctmaxresids-size, fig.width=10, fig.height=7, include = F}
# ctmax_resids %>% 
#   #filter(sex == "female") %>% 
#   ggplot(aes(x = size, y = resids, colour = sp_name)) + 
#   facet_wrap(sp_name~.) + 
#   geom_point(size = 2) + 
#   geom_smooth(method = "lm", se = F, linewidth = 2) + 
#   labs(x = "Length (mm)", 
#        y = "CTmax (°C)") + 
#   scale_colour_manual(values = species_cols) + 
#   theme_matt() + 
#   theme(legend.position = "none")
```

Shown here is the relationship between fecundity and size, showing the classic pattern of increasing egg production with increasing size. 

```{r fecundity-size}
size_fecund_plot = ctmax_resids %>%  
  drop_na(fecundity) %>% 
  ggplot(aes(x = size, y = fecundity, colour = sp_name)) + 
  geom_smooth(method = "lm", se = F, linewidth = 2) + 
  geom_point(size = 2, alpha = 0.5) + 
  labs(x = "Prosome length (mm)", 
       y = "Fecundity (# Eggs)",
       colour = "Species") + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "right")
```

Individuals may also allocate energy to different fitness related traits, prioritizing reproductive output over environmental tolerance, for example. Shown below is the relationship between CTmax residuals (again, controlling for the effects of collection temperature) against fecundity. We can see clearly that individuals with increased fecundity are not decreasing thermal limits, suggesting that there is no energetic trade-off between these traits. 
```{r, main-fig-fecundity-plots, fig.width=8.5, fig.height=10}
ctmax_fecund_plot = ctmax_resids %>%  
  drop_na(fecundity) %>% 
  ggplot(aes(x = resids, y = fecundity, colour = sp_name)) + 
  geom_smooth(method = "lm", se = F, linewidth = 2) + 
  geom_point(size = 2, alpha = 0.5) + 
  labs(x = "CTmax Residuals (°C)", 
       y = "Fecundity (# Eggs)",
       colour = "Species") + 
  scale_colour_manual(values = species_cols) + 
  theme_matt() + 
  theme(legend.position = "right")

ggarrange(size_fecund_plot, ctmax_fecund_plot, ncol = 1, common.legend = T, labels = "AUTO", legend = "right")
```

Previous studies have shown that the magnitude of the size-fecundity correlation may be environmentally-dependent. This is not visible is the data from these collections. 

```{r supp-fig-fec-size-corr-vs-temp, fig.width = 4, fig.height=10}
corr_data = full_data %>% 
  drop_na(fecundity) %>% 
  filter(sp_name %in% c("Leptodiaptomus sicilis",
                        "Leptodiaptomus minutus", 
                        "Skistodiaptomus sp")) %>%  
  group_by(collection_date, collection_temp, sp_name) %>% 
  summarise(size_fec_corr = cor(size, fecundity),
            n = n(),
            mean_fecundity = mean(fecundity)) %>% 
  filter(n >= 3) %>% 
  ungroup() %>%  
  group_by(sp_name) %>% 
  mutate(temp_cent = scale(collection_temp, scale = F))

ggplot(corr_data, aes(x = temp_cent, y = size_fec_corr, colour = sp_name)) + 
  facet_wrap(sp_name~., nrow = 3) + 
  geom_hline(yintercept = 0) +
  geom_point(size = 3) + 
  geom_smooth(method = "lm", linewidth = 2) + 
  scale_color_manual(values = species_cols) + 
  labs(x = "Temperature (centered)", 
       y = "Correlation Coefficient") + 
  coord_cartesian(ylim = c(-1, 1)) +
  theme_matt_facets() + 
  theme(legend.position = "none")

# ggplot(corr_data, aes(x = size_fec_corr)) +
#     facet_wrap(sp_name~., nrow = 3) +
#   geom_histogram(binwidth = 0.2)
```


## Other patterns in variation 

*Leptodiaptomus sicilis* is the most abundant species during the winter. There was a large shift in the size of mature females towards the end of December. These large and small individuals are the same species (confirmed via COI sequencing), suggesting this shift may instead reflect a transition from one generation to another. This size difference may be caused by differences in the developmental environments. For example, individuals developing in January grow up at very low temperatures, and therefore may reach larger sizes. These individuals over-summer in deep waters, then re-emerge in October and produce a new generation. Water temperatures are still fairly high through November, which results in a generation of smaller individuals. These individuals mature in time to produce a new generation in January. 

```{r supp-fig-lsic-morph-size, fig.width=9, fig.height=9}
full_data %>%  
  filter(sp_name == "Leptodiaptomus sicilis") %>% 
  filter(sex != "juvenile") %>% 
  group_by(collection_date) %>% 
  mutate(size_center = scale(size, center = T, scale = F)) %>% 
  ggplot(aes(y = factor(collection_date), x = size, fill = collection_temp)) + 
  facet_wrap(sex~.) + 
  geom_density_ridges(bandwidth = 0.04) + 
  geom_vline(xintercept = 0.89) + 
  labs(x = "Size (mm)",
       y = "Date", 
       fill = "Coll. Temp. (°C)") + 
  theme_matt() + 
  theme(legend.position = "right",
        axis.text.y = element_text(size = 12))
```

A similar, but less distinct pattern can be observed in L. minutus individuals as well. 
```{r supp-fig-lmin-morph-size, fig.width=9, fig.height=9}
full_data %>%  
  filter(sp_name == "Leptodiaptomus minutus") %>% 
  filter(sex != "juvenile") %>% 
  ggplot(aes(y = factor(collection_date), x = size, fill = collection_temp)) + 
  facet_wrap(sex~.) + 
  geom_density_ridges(bandwidth = 0.04) + 
  geom_vline(xintercept = 0.69) + 
  labs(x = "Size (mm)",
       y = "Date", 
       fill = "Coll. Temp. (°C)") + 
  coord_cartesian(xlim = c(0.5,0.9)) + 
  theme_matt() + 
  theme(legend.position = "right",
        axis.text.y = element_text(size = 12))
```

## Distribution Lag Non-Linear Model (DLNM approach) 

Distributed lag models examine a response variable, measured at multiple time points, as a function of the lagged occurrence of some predictor variable (response y at time t as a function of predictor x(t-lag). This method utilizes a bi-dimensional dose-lag-response function, which essentially examines not only the dose effect, but the effect of the timing of the dose. 


```{r compiling_temp_lags}
# Run this code, save the product, and then just read in the temp lag data object. Takes too long to run each time this document is knit. 

# lag_temps = temp_data %>%
#   group_by(date, hour) %>%
#   summarize("mean_temp" = mean(temp, na.rm = T)) %>%
#   ungroup() %>%
#   mutate(point_num = row_number())
# 
# uniq_days = length(unique(lag_temps$date))
# 
# g = gam(mean_temp ~ s(point_num, bs="cr", k=uniq_days + 10),
#     method = "REML",
#     data = lag_temps)
# 
# points = seq(1, nrow(lag_temps), length.out = length(lag_temps$hour))
# 
# df.res = df.residual(g)
# 
# pred_temps = predict(g, newdata = lag_temps, type = "response", se.fit = TRUE)
# 
# lag_temps = lag_temps %>%
#   mutate(trend_T = pred_temps$fit,
#          trend_se = pred_temps$se.fit,
#          temp_diff = mean_temp - trend_T)
# 
# write.csv(lag_temps, file = "./Output/Data/lag_temps.csv", row.names = F)

```

```{r supp-fig-daily-max-ctmax, fig.width=9, fig.height=4} 

dlnm_data = full_data %>%  
  filter(sex == "female") %>% 
  filter(sp_name %in% c(
    "Leptodiaptomus sicilis",
    "Leptodiaptomus minutus"
  )) %>% 
  select(collection_date, collection_temp, sp_name, ctmax) %>% 
  group_by(collection_date, collection_temp, sp_name) %>%  
  summarise(mean_ctmax = mean(ctmax, na.rm = T),
            sample = n())

temp_data %>% 
  group_by(date) %>% 
  summarise(mean_temp = mean(temp),
            max_temp = max(temp, na.rm = T)) %>% 
  right_join(dlnm_data, by = join_by("date" == "collection_date")) %>% 
  ggplot(aes(x = max_temp, y = mean_ctmax)) + 
  facet_wrap(.~sp_name) + 
  geom_smooth(method = "gam") + 
  geom_point() + 
  labs(x = "Max Daily Temp. (°C)",
       y = "Mean CTmax (°C)") + 
  theme_matt_facets() + 
  theme(strip.text.x = element_text(size = 12))
```

```{r supp-fig-dlnm-plot}

sp_list = unique(dlnm_data$sp_name)

for(lag_species in sp_list){
  
  dlnm_data_sp = dlnm_data %>% 
    filter(sp_name == lag_species)
  
  # We need to estimate a matrix of exposure histories for each observation. This contains the series of exposures at each lag (l) for each of the n observations, constrained between l0 (minimum lag) and L (max lag). 
  
  dates = dlnm_data_sp$collection_date # For each of these dates, make a vector of the past 30 days (including the day of collection). NOTE: Don't use 'unique' dates here since some collections had multiple species
  
  exp_hist_z = data.frame()
  exp_hist_trend = data.frame()
  
  for(d in dates){
    
    history = lag_temps %>% 
      filter(date <= d & date > d - 10) %>% 
      arrange(desc(date), desc(hour)) %>% 
      mutate(lag = row_number() - 1) %>% 
      select(lag, mean_temp, temp_diff)
    
    z_vec = scale(history$mean_temp)[,1]
    names(z_vec) = history$lag
    
    trend_vec = history$temp_diff
    names(trend_vec) = history$lag
    
    exp_hist_z = bind_rows(exp_hist_z, z_vec)
    exp_hist_trend = bind_rows(exp_hist_trend, trend_vec)
    
  }
  
  #print(max(exp_hist_trend, na.rm = T))
  
  # The cross-basis function from dlnm will use the class of the x parameter to determine what to do. In our case, we need to provide it with the matrix of exposure histories for reach observation (row) and lag (column). 
  
  cb_temps = crossbasis(exp_hist_trend, lag = c(0,dim(exp_hist_trend)[2]-1), 
                        argvar =list(fun="cr",df=3), 
                        arglag=list(fun="cr",df=3,intercept=T))
  
  #summary(cb_temps)
  
  penalized_mat <- cbPen(cb_temps)
  
  #fitting GAM
  lag.gam = gam(data = dlnm_data_sp, 
                mean_ctmax ~ collection_temp + cb_temps, 
                method = "GCV.Cp",
                paraPen=list(cb_temps=penalized_mat))
  
  # summary(lag.gam)
  # AIC(lag.gam)
  
  #estimation of exposures effects
  
  #default plots
  pred_gam_Zs<-crosspred(cb_temps, lag.gam, 
                         cumul=F, cen=0, ci.level = 0.95,
                         at=seq(-4,4, 0.1))
  
  plot(pred_gam_Zs, "contour", main = lag_species, 
              xlab = "Temperature Deviation (°C)", 
              ylab = "Hours before collection")
  
  
  # 
  # plot(pred_gam_Zs, border = 2, cumul=F,
  #       theta=110,phi=20,ltheta=-80)
  
  # plot(pred_gam_Zs, "slices",
  #      var = c(3,-3),
  #      lag = c(1,200),
  #      col = 2)
  # 
}


```


## Miscellany


```{r supp-fig-ramp-rate, fig.width = 7, fig.height=5}
run_starts = temp_record %>% 
  group_by(run) %>% 
  filter(minute_passed <= 3) %>% 
  summarise(start_temp = mean(temp_C)) %>% 
  mutate("temp_bin" = cut_number(start_temp, 4),
         temp_bin = case_when(
           temp_bin == "[2.7,9.26]" ~ "[2.7°C - 9.26°C]",
           temp_bin == "(9.26,13.9]" ~ "[9.26°C - 13.9°C]",
           temp_bin == "(13.9,21]" ~ "[13.9°C - 21°C]",
           temp_bin == "(21,30.5]" ~ "[21°C - 30.5°C]"
         ))

ramp_record2 = ramp_record %>% 
  group_by(run, minute_interval) %>% 
  summarise(mean_ramp = mean(ramp_per_minute)) %>% 
  ungroup() %>% 
  left_join(run_starts) %>% 
  mutate(temp_bin = fct_reorder(temp_bin, start_temp, .fun = mean))

ggplot(ramp_record2, aes(x = minute_interval, y = mean_ramp)) + 
  facet_wrap(temp_bin~.) + 
  geom_hline(yintercept = 0.3, colour = "grey") + 
  geom_hline(yintercept = 0.1, colour = "grey") + 
  geom_hex(aes(fill = cut(..count.., c(2, 5, 10, 20, 30, 40, 50))),
           bins = 30) + 
  scale_fill_viridis_d(name="Number of Observations",
                       labels = c("<5", "5-9", "10-19", "20-29", "30-39", "40-50"),
                       option = "mako") + 
  labs(y = "Ramp Rate (deg. C / min.)",
       x = "Time into run (minute)") + 
  theme_matt_facets(base_size = 12)
```






```{r}

if(predict_vuln == F){

  knitr::knit_exit()

}

```

## Hindcasting vulnerability with different acclimation scenarios
Using the observed thermal limit data, we can produce a hindcast of thermal stress for Lake Champlain copepods. For these initial assays, we will define thermal stress as any time when maximum daily water temperature is within 5°C of copepod CTmax or higher (i.e. warming tolerance is less than 5°C). We will use three different scenarios: 1) each species has a unique but fixed thermal limit (average measured CTmax), 2) species have unique thermal limits and are able to acclimate (constant ARR across all species used to predict CTmax based on average daily temperatures), and 3) species have unique thermal limits and species-specific acclimation (CTmax predicted using whichever environmental factor and duration is the strongest candidate for driving acclimation - from the correlation analysis). In all cases, data is filtered to just thermal limits of adult females. 

### Scenario 1
```{r}
mean_ctmax = full_data %>% 
  filter(sex == "female") %>%  
  group_by(sp_name) %>% 
  summarize("mean_ctmax" = mean(ctmax)) %>% 
  arrange(mean_ctmax)

knitr::kable(mean_ctmax)
```

```{r}
# # Constructs the URL for the full temperature data set; RUN THIS ONCE
# hind_url = constructNWISURL(siteNumbers = siteNumber, parameterCd = parameterCd, service = "uv")
# 
# hind_temp_data = importWaterML1(hind_url, asDateTime = T) %>%
#   mutate("date" = as.Date(dateTime)) %>%
#   select(date, "temp" = X_00010_00000)
# 
# write.table(x = hind_temp_data, file = "hindcast_temps.csv", row.names = F, sep = ",")
```

```{r}
# ggplot(hind_temp_data, aes(x = date, y = temp)) + 
#   geom_line(linewidth = 0.1) + 
#   labs(x = "Date", 
#        y = "Water Temperature (°C)") +
#   theme_matt()
```

In the simplest scenario, species thermal limits are static through time, represented by the average CTmax of adult female copepods. In this scenario, only three of the seven observed species are exposed to thermal stress (temperatures within 5°C of CTmax). Temperatures approached the thermal limit of *Leptodiaptomus sicilis* on a handful of days. By contrast, *Senecella calanoides* and *Limnocalanus macrurus* were both exposed to substantial thermal stress throughout a large portion of the year, likely explaining why these species are absent from the community for the summer and fall periods. 

```{r supp-fig-hind1_scenario, fig.width=10, fig.height=5}
hind1_data = hind_temp_data %>% 
  group_by(date) %>% 
  summarize("daily_max" = max(temp),
            "daily_mean" = mean(temp),) %>% 
  bind_cols(pivot_wider(mean_ctmax, names_from = sp_name, values_from = mean_ctmax)) %>%  
  pivot_longer(cols = c(-date, -daily_max, -daily_mean),
               names_to = "species", 
               values_to = "mean_ctmax") %>%  
  mutate(lim_diff = mean_ctmax - daily_max) %>%  
  mutate(doy = yday(date),
         "method" = "No_acclimation")

hind_daily_temp_data = hind_temp_data %>%
  ungroup() %>% 
  group_by(date) %>% 
  summarise(mean_temp = mean(temp),
            med_temp = median(temp),
            var_temp = var(temp), 
            min_temp = min(temp), 
            max_temp = max(temp)) %>% 
  mutate("range_temp" = max_temp - min_temp)

#table(hind1_data$species)

hind1_data %>% 
  filter(lim_diff <= 5) %>%  
  ggplot(aes(x = doy, y = lim_diff, colour = species)) +
  geom_hline(yintercept = 0) + 
  geom_hline(yintercept = 5, 
             colour = "grey") + 
  geom_point(alpha = 0.5) +
  geom_smooth(se = F) + 
  labs(x = "Day of Year", 
       y = "Predicted Warming Tolerance \n(°C Above Daily Max)") + 
  theme_matt() + 
  theme(legend.position = "right")
```

### Scenario 2
In the second scenario, thermal limits vary within and between species. A simple model is used to predict species thermal limits based on mean daily temperature (CTmax as a function of species and collection temperature, but without the interaction between these two factors). These predicted thermal limits are then compared against the maximum daily temperature to estimate thermal stress, as in Scenario 1. Including this simple form of acclimation in the model reduced the degree of thermal stress for each species, eliminating it entirely for *Leptodiaptomus sicilis*. Note that the magnitude of the predicted stress is  low enough that removing the 5°C buffer around the predicted thermal limits would actually limit predicted thermal stress to just a few days for *Senecella calanoides*. 

```{r supp-fig-hind2_scenario, fig.height=5, fig.width=10}
hindcast_model1 = lm(data = filter(full_data, sex == "female"),
                     ctmax ~ collection_temp + sp_name)

hind2_data = hind_temp_data %>% 
  group_by(date) %>% 
  summarize("collection_temp" = mean(temp),
            "daily_max" = max(temp)) %>% 
  bind_cols(
    pivot_wider(mean_ctmax, 
                names_from = sp_name, 
                values_from = mean_ctmax)) %>% 
  pivot_longer(cols = c(-date, -daily_max, -collection_temp),
               names_to = "sp_name", 
               values_to = "mean_ctmax") %>% 
  select(-mean_ctmax) %>% 
  mutate("pred_ctmax" = predict.lm (hindcast_model1, newdata = .)) %>% 
  select(date, "daily_mean" = collection_temp, daily_max, "species" = sp_name, pred_ctmax) %>% 
  mutate(lim_diff = pred_ctmax - daily_max) %>% 
  #filter(lim_diff <= 0) %>%  
  mutate(doy = yday(date),
         "method" = "Constant_acclimation")

# ggplot(hind2_data, aes(x = daily_mean, y = pred_ctmax, colour = species)) +
#   geom_smooth(method = "lm") 

# table(hind2_data$species)
hind2_data %>%  
  filter(lim_diff <= 5) %>%  
  ggplot(aes(x = doy, y = lim_diff, colour = species)) +
  geom_hline(yintercept = 0) + 
  geom_hline(yintercept = 5, 
             colour = "grey") + 
  geom_point(alpha = 0.5) +
  geom_smooth() + 
  labs(x = "Day of Year", 
       y = "Predicted Warming Tolerance \n(°C Above Daily Max)") + 
  theme_matt() + 
  theme(legend.position = "right")
```

### Scenario 3
The final scenario allows the environmental variable used to predict CTmax to vary between species. For species observed in fewer than 5 collections, we use the same approach as in Scenario 2. For species observed in more than 5 collections, however, the factor with the strongest correlation with CTmax is used to predict thermal limits. These factors are included below.

```{r}
hind_preds = corr_vals %>%  
  filter(sig == "Sig.") %>% 
  drop_na(correlation) %>% 
  group_by(sp_name) %>%
  arrange(desc(correlation)) %>% 
  slice_head(n = 1) %>% 
  select("Species" = sp_name, "Predictor" = parameter, "Duration" = duration, "Correlation" = correlation, "P-Value" = p.value)

knitr::kable(hind_preds, align = "c")
```

```{r}
hind3_data = hind2_data %>% # Contains data for species that won't change from scenario 2
  filter(!(species %in% corr_vals$sp_name))

preds_to_pull = hind_preds %>%  
  select(Species, Predictor, Duration) 

for(i in 1:length(preds_to_pull$Species)){
  
  duration = preds_to_pull$Duration[i]
  
  if(duration == 0){ #The prior day temperature metrics should be used
    
    predictors = hind_daily_temp_data %>% 
      mutate(date = date) 
    
    parameter = "mean_temp" #using mean temperature as a proxy for collection temp
    
    model_data = full_data %>%
      filter(sp_name %in% preds_to_pull$Species[i]) %>% 
      filter(sex == "female") %>% 
      mutate(collection_date = as_date(collection_date)) %>% 
      inner_join(predictors, join_by(collection_date == date)) %>%  
      select(ctmax, contains(parameter))
    
    if(dim(model_data)[2] == 2){
      hind.model = lm(data = model_data, 
                      ctmax ~ .)
      
      sp_data = predictors %>% 
        select(date, contains(parameter)) %>% 
        mutate(pred_ctmax = predict(hind.model, newdata = .)) %>%  
        select(date, pred_ctmax) %>% 
        inner_join(hind_daily_temp_data, by = c("date")) %>% 
        mutate("species" = preds_to_pull$Species[i],
               "doy" = yday(date),
               lim_diff = pred_ctmax - max_temp) %>% 
        select(date, daily_mean = mean_temp, daily_max = max_temp, species, pred_ctmax, lim_diff, doy)
      
      hind3_data = bind_rows(hind3_data, sp_data)
    }else{
      print(c(unique(sp_data$species), "Too many columns selected"))
    }
    
  }
  
  if(duration > 0){
    #Neither the prior day nor day of metrics should be used; use duration as n_days
    
    predictors = get_predictors(daily_values = hind_daily_temp_data, 
                                raw_temp = hind_temp_data, 
                                n_days = duration)
    
    parameter = preds_to_pull$Predictor[i]
    
    model_data = full_data %>%
      filter(sp_name %in% preds_to_pull$Species[i]) %>% 
      filter(sex == "female") %>% 
      mutate(collection_date = as_date(collection_date)) %>% 
      left_join(predictors, join_by(collection_date == date)) %>%  
      select(ctmax, contains(paste("day_", parameter, sep = "")))
    
    if(dim(model_data)[2] == 2){
      hind.model = lm(data = model_data, 
                      ctmax ~ .)
      
      sp_data = predictors %>% 
        select(date, contains(parameter)) %>% 
        mutate(pred_ctmax = predict(hind.model, newdata = .)) %>%  
        select(date, pred_ctmax) %>% 
        inner_join(hind_daily_temp_data, by = c("date")) %>% 
        mutate("species" = preds_to_pull$Species[i],
               "doy" = yday(date),
               lim_diff = pred_ctmax - max_temp) %>% 
        select(date, daily_mean = mean_temp, daily_max = max_temp, species, pred_ctmax, lim_diff, doy)
      
      hind3_data = bind_rows(hind3_data, sp_data)
      
    }else{
      print(c(unique(sp_data$species), "Too many columns selected"))
    }
    
  }
}


hind3_data = hind3_data %>% 
  mutate("method" = "Variable_acclimation")
```

This third approach did not affect the predicted patterns in *Limnocalanus* or *Senecella* (neither species has been observed in enough collections to estimate the effects of different environmental factors). Changing the acclimation approach did affect patterns in thermal limits in the other species though. The figure below shows how predicted warming tolerance varies over the year in the seven species, based on the three different prediction methods. In general, constant thermal limits (the 'no acclimation' method) resulted in larger warming tolerance during the winter and lower warming tolerance during the summer, although this effect was small in most species.     
```{r supp-fig-hind_cast_summary, fig.width=13, fig.height=10}
synthesis = bind_rows(
  select(hind1_data, date, doy, daily_mean, daily_max, species, "pred_ctmax" = mean_ctmax, lim_diff, method),
  select(hind2_data, date, doy, daily_mean, daily_max,  species, pred_ctmax, lim_diff, method),
  select(hind3_data, date, doy, daily_mean, daily_max,  species, pred_ctmax, lim_diff, method)) %>% 
  mutate(method = fct_relevel(method, "No_acclimation", "Constant_acclimation", "Variable_acclimation")) %>% 
  filter(!(species == "Osphranticum labronectum" & method == "Variable_acclimation"))

climatology = synthesis %>% 
  group_by(species, doy, method) %>%  
  summarise("mean_diff" = mean(lim_diff),
            "min_diff" = min(lim_diff),
            "max_diff" = max(lim_diff)) %>% 
  mutate(method = fct_relevel(method, "No_acclimation", "Constant_acclimation", "Variable_acclimation"))

acc_effects = synthesis %>% 
  pivot_wider(id_cols = c(date, species, doy), 
              names_from = method, 
              values_from = lim_diff) %>%  
  mutate("const_acc_effect" = Constant_acclimation - No_acclimation,
         "var_acc_effect" = Variable_acclimation - No_acclimation)

ggplot(synthesis, aes(x = doy, y = lim_diff, colour = method)) + 
  facet_wrap(species~.) + 
  geom_hline(yintercept = 0) + 
  geom_hline(yintercept = 5, colour = "grey") + 
  geom_point(alpha = 0.1) + 
  labs(x = "Day of Year", 
       y = "Predicted Warming Tolerance (°C Above Daily Max)") + 
  guides(colour = guide_legend(override.aes = list(alpha = 1))) + 
  theme_matt_facets(base_size = 18) + 
  theme(strip.text.x.top = element_text(size = 10))
```

```{r supp-fig-acc_scenario_effects, fig.width=9, fig.height=7}
wt_hindcast_summary = synthesis %>%  
  mutate("year" = year(date)) %>% 
  group_by(species, year, method) %>% 
  summarise("min_wt" = min(lim_diff),
            "max_wt" = max(lim_diff)) %>% 
  pivot_longer(cols = c(min_wt, max_wt), 
               names_to = "metric", 
               values_to = "wt") %>% 
  group_by(species, method, metric) %>% 
  summarise("mean_wt" = mean(wt))

wt_hindcast_summary %>% 
  filter(metric == "min_wt") %>% 
ggplot(aes(x = method, y = mean_wt, group = species, colour = species)) + 
  #facet_wrap(.~metric, scales = "free_y") + 
  geom_hline(yintercept = 0) + 
  geom_hline(yintercept = 5) + 
  geom_line(linewidth = 2) + 
  geom_point(size = 3) + 
  labs(x = "Scenario", 
       y = "Mean Yearly Minimum WT (°C)") + 
  scale_color_manual(values = species_cols) + 
  theme_matt_facets() + 
  theme(axis.text.x = element_text(angle = 300, hjust = 0, vjust = 0.5))
```
